{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
],
"metadata": {
"id": "SHZ00Nm0lEdm"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "G0qlWsRrk4DV",
"outputId": "f880bcf9-be1e-4523-eb5c-6cd5e1a75eeb"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-01-09 18:45:34-- https://files.grouplens.org/datasets/movielens/ml-latest-small.zip\n",
"Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152\n",
"Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 978202 (955K) [application/zip]\n",
"Saving to: ‘ml-latest-small.zip’\n",
"\n",
"ml-latest-small.zip 100%[===================>] 955.28K 4.10MB/s in 0.2s \n",
"\n",
"2023-01-09 18:45:35 (4.10 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]\n",
"\n",
"Archive: ml-latest-small.zip\n",
" creating: ml-latest-small/\n",
" inflating: ml-latest-small/links.csv \n",
" inflating: ml-latest-small/tags.csv \n",
" inflating: ml-latest-small/ratings.csv \n",
" inflating: ml-latest-small/README.txt \n",
" inflating: ml-latest-small/movies.csv \n"
]
}
],
"source": [
"!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip\n",
"!unzip ml-latest-small.zip"
]
},
{
"cell_type": "markdown",
"source": [
"### Loading the data"
],
"metadata": {
"id": "xNGwDVaHoIHy"
}
},
{
"cell_type": "code",
"source": [
"movies = pd.read_csv('ml-latest-small/movies.csv')\n",
"ratings = pd.read_csv('ml-latest-small/ratings.csv')\n"
],
"metadata": {
"id": "C0L0UiZdnjDw"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Investigating the data"
],
"metadata": {
"id": "1iDomU7ToLzX"
}
},
{
"cell_type": "code",
"source": [
"movies.sample(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "tXM3A7pSn9yM",
"outputId": "7a77efc9-4b8a-429d-d6e8-fb7c892293f3"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" movieId title genres\n",
"2086 2772 Detroit Rock City (1999) Comedy\n",
"82 93 Vampire in Brooklyn (1995) Comedy|Horror|Romance\n",
"5288 8743 Biggles (1986) Adventure|Fantasy|Sci-Fi\n",
"1254 1667 Mad City (1997) Action|Drama\n",
"6050 40478 Night of the Lepus (1972) Horror|Sci-Fi|Thriller"
],
"text/html": [
"\n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" movieId | \n",
" title | \n",
" genres | \n",
"
\n",
" \n",
" \n",
" \n",
" 2086 | \n",
" 2772 | \n",
" Detroit Rock City (1999) | \n",
" Comedy | \n",
"
\n",
" \n",
" 82 | \n",
" 93 | \n",
" Vampire in Brooklyn (1995) | \n",
" Comedy|Horror|Romance | \n",
"
\n",
" \n",
" 5288 | \n",
" 8743 | \n",
" Biggles (1986) | \n",
" Adventure|Fantasy|Sci-Fi | \n",
"
\n",
" \n",
" 1254 | \n",
" 1667 | \n",
" Mad City (1997) | \n",
" Action|Drama | \n",
"
\n",
" \n",
" 6050 | \n",
" 40478 | \n",
" Night of the Lepus (1972) | \n",
" Horror|Sci-Fi|Thriller | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"ratings.sample(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "0YLXE3XioD-k",
"outputId": "e8de47f3-cda4-4126-d250-f713757d6bd9"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" userId movieId rating timestamp\n",
"39958 274 5076 3.0 1172275318\n",
"31813 219 4643 2.5 1194932356\n",
"100472 610 81520 4.0 1479543207\n",
"99812 610 4232 3.5 1479542821\n",
"22462 153 1954 0.5 1525552646"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" userId | \n",
" movieId | \n",
" rating | \n",
" timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 39958 | \n",
" 274 | \n",
" 5076 | \n",
" 3.0 | \n",
" 1172275318 | \n",
"
\n",
" \n",
" 31813 | \n",
" 219 | \n",
" 4643 | \n",
" 2.5 | \n",
" 1194932356 | \n",
"
\n",
" \n",
" 100472 | \n",
" 610 | \n",
" 81520 | \n",
" 4.0 | \n",
" 1479543207 | \n",
"
\n",
" \n",
" 99812 | \n",
" 610 | \n",
" 4232 | \n",
" 3.5 | \n",
" 1479542821 | \n",
"
\n",
" \n",
" 22462 | \n",
" 153 | \n",
" 1954 | \n",
" 0.5 | \n",
" 1525552646 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"ratings.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VWQCoAqsoTfm",
"outputId": "f3e90847-3c34-4d20-f830-ca3b73023239"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 100836 entries, 0 to 100835\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 userId 100836 non-null int64 \n",
" 1 movieId 100836 non-null int64 \n",
" 2 rating 100836 non-null float64\n",
" 3 timestamp 100836 non-null int64 \n",
"dtypes: float64(1), int64(3)\n",
"memory usage: 3.1 MB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"ratings.describe()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"id": "8ZFMA9uyoYO_",
"outputId": "d51c594d-7f56-44bc-d330-f9f104e26e0d"
},
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" userId movieId rating timestamp\n",
"count 100836.000000 100836.000000 100836.000000 1.008360e+05\n",
"mean 326.127564 19435.295718 3.501557 1.205946e+09\n",
"std 182.618491 35530.987199 1.042529 2.162610e+08\n",
"min 1.000000 1.000000 0.500000 8.281246e+08\n",
"25% 177.000000 1199.000000 3.000000 1.019124e+09\n",
"50% 325.000000 2991.000000 3.500000 1.186087e+09\n",
"75% 477.000000 8122.000000 4.000000 1.435994e+09\n",
"max 610.000000 193609.000000 5.000000 1.537799e+09"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" userId | \n",
" movieId | \n",
" rating | \n",
" timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 100836.000000 | \n",
" 100836.000000 | \n",
" 100836.000000 | \n",
" 1.008360e+05 | \n",
"
\n",
" \n",
" mean | \n",
" 326.127564 | \n",
" 19435.295718 | \n",
" 3.501557 | \n",
" 1.205946e+09 | \n",
"
\n",
" \n",
" std | \n",
" 182.618491 | \n",
" 35530.987199 | \n",
" 1.042529 | \n",
" 2.162610e+08 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.500000 | \n",
" 8.281246e+08 | \n",
"
\n",
" \n",
" 25% | \n",
" 177.000000 | \n",
" 1199.000000 | \n",
" 3.000000 | \n",
" 1.019124e+09 | \n",
"
\n",
" \n",
" 50% | \n",
" 325.000000 | \n",
" 2991.000000 | \n",
" 3.500000 | \n",
" 1.186087e+09 | \n",
"
\n",
" \n",
" 75% | \n",
" 477.000000 | \n",
" 8122.000000 | \n",
" 4.000000 | \n",
" 1.435994e+09 | \n",
"
\n",
" \n",
" max | \n",
" 610.000000 | \n",
" 193609.000000 | \n",
" 5.000000 | \n",
" 1.537799e+09 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "markdown",
"source": [
"As the timestamp is not useful for our task, we drop it from our dataset"
],
"metadata": {
"id": "znX4JdJjokbG"
}
},
{
"cell_type": "code",
"source": [
"ratings1 = ratings.drop('timestamp', axis=1)"
],
"metadata": {
"id": "1TvD38Njojfu"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Building the rating matrix from the dataset"
],
"metadata": {
"id": "7cBnwsw6pSOv"
}
},
{
"cell_type": "code",
"source": [
"rating_matrix = ratings1.pivot(index='userId',columns='movieId',values='rating').fillna(0)"
],
"metadata": {
"id": "15ULmzG4pbDo"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"rating_matrix"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 455
},
"id": "GvG6MTXdqe8e",
"outputId": "7fa62fae-2a48-4a5a-8a36-a0af08ee443a"
},
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"movieId 1 2 3 4 5 6 7 8 \\\n",
"userId \n",
"1 4.0 0.0 4.0 0.0 0.0 4.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"5 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... \n",
"606 2.5 0.0 0.0 0.0 0.0 0.0 2.5 0.0 \n",
"607 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"608 2.5 2.0 2.0 0.0 0.0 0.0 0.0 0.0 \n",
"609 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"610 5.0 0.0 0.0 0.0 0.0 5.0 0.0 0.0 \n",
"\n",
"movieId 9 10 ... 193565 193567 193571 193573 193579 193581 \\\n",
"userId ... \n",
"1 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"5 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"606 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"607 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"608 0.0 4.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"609 0.0 4.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"610 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
"movieId 193583 193585 193587 193609 \n",
"userId \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"5 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"606 0.0 0.0 0.0 0.0 \n",
"607 0.0 0.0 0.0 0.0 \n",
"608 0.0 0.0 0.0 0.0 \n",
"609 0.0 0.0 0.0 0.0 \n",
"610 0.0 0.0 0.0 0.0 \n",
"\n",
"[610 rows x 9724 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" movieId | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" 10 | \n",
" ... | \n",
" 193565 | \n",
" 193567 | \n",
" 193571 | \n",
" 193573 | \n",
" 193579 | \n",
" 193581 | \n",
" 193583 | \n",
" 193585 | \n",
" 193587 | \n",
" 193609 | \n",
"
\n",
" \n",
" userId | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 606 | \n",
" 2.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 607 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 608 | \n",
" 2.5 | \n",
" 2.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 609 | \n",
" 3.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 610 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
610 rows × 9724 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"source": [
"### Analysis the sparsity of the rating matrix\n",
"\n",
"We do it in 2 ways:\n",
"\n",
"First, without using the matrix. For this, we keep in mind that sparsity=ratings/(users*items)"
],
"metadata": {
"id": "wBBLH1oSrc5l"
}
},
{
"cell_type": "code",
"source": [
"number_ratings = len(ratings)\n",
"number_users = len(ratings['userId'].unique())\n",
"number_films = len(ratings['movieId'].unique())\n",
"sparsity = number_ratings/(number_users*number_films)\n",
"\n",
"print(f'Sparsity: {sparsity}')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tuD9RDNKq1AB",
"outputId": "1c865a5e-73c4-4142-a4ac-efc012f1638e"
},
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Sparsity: 0.016999683055613623\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Second, directly from the matrix"
],
"metadata": {
"id": "h3abD7agub3L"
}
},
{
"cell_type": "code",
"source": [
"def calculate_sparsity(A):\n",
" \"\"\"\n",
" Calculates the sparsity of the matrix A\n",
" \"\"\"\n",
" sparsity = np.count_nonzero(A)/np.size(A)\n",
" return sparsity"
],
"metadata": {
"id": "Xx0J9GypufzB"
},
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(f'Sparsity: {calculate_sparsity(np.array(rating_matrix))}')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "n94d4D9zwBmO",
"outputId": "7375cda2-60c3-49d6-af66-ff5ee0b4c107"
},
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Sparsity: 0.016999683055613623\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### Distribution of popularity of the movies"
],
"metadata": {
"id": "WmPoV4Nixbp0"
}
},
{
"cell_type": "code",
"source": [
"movies = ratings['movieId'].unique()\n",
"\n",
"popularity=[]\n",
"\n",
"for movie in movies:\n",
" rates = ratings[ratings['movieId']== movie]\n",
" popularity.append(len(rates))"
],
"metadata": {
"id": "BBeKHHeVxowt"
},
"execution_count": 17,
"outputs": []
},
{
"cell_type": "code",
"source": [
"plt.figure(figsize = (10, 8)) \n",
"plt.hist(popularity)\n",
"plt.title('Distribution of movies by number of ratings received')\n",
"plt.xlabel('Popularity (Number of rates received)')\n",
"plt.ylabel('Number of movies')"
],
"metadata": {
"id": "4VSRkhxgydym",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 530
},
"outputId": "e53f2b78-4ac1-4bbc-a5b1-9dccb813b4b7"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0, 0.5, 'Number of movies')"
]
},
"metadata": {},
"execution_count": 36
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"