import functions_framework
from google.cloud import bigquery, storage
import pandas as pd
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import NMF
import numpy as np
@functions_framework.http
def main(request):
# Initialize a client
client = bigquery.Client()
storage_client = storage.Client()
# Specify the query
query = """
SELECT user_id, package_name, LOG(SUM(foreground_time_ms)) as weight
FROM `data604-project-g3.Footprint_data.app_usage`
GROUP BY user_id, package_name
ORDER BY user_id
"""
# Make the query
df = client.query(query).to_dataframe()
# Prepare the user-item matrix
user_item_matrix = df.pivot(index='user_id', columns='package_name', values='weight').fillna(0)
# Calculate user-user similarity using cosine similarity
similarity_matrix = cosine_similarity(user_item_matrix, user_item_matrix)
similarity_dict = {}
# Find the most similar user to the target user
for target_user_id in user_item_matrix.index:
target_user_idx = user_item_matrix.index.get_loc(target_user_id)
similar_user_idx = similarity_matrix[target_user_idx].argsort()[-2]
most_similar_user_id = user_item_matrix.index[similar_user_idx]
similarity_dict[target_user_id] = most_similar_user_id
with storage_client.bucket('data604-project-g3-data-free').blob(f"ml/collaborative_filtering/explain.pkl").open("wb") as file:
pickle.dump(similarity_dict, file)
with storage_client.bucket('data604-project-g3-data-free').blob(f"ml/collaborative_filtering/data.pkl").open("wb") as file:
pickle.dump(df, file)
model = NMF(n_components=5)
A = model.fit_transform(user_item_matrix)
B = model.components_
user_item_matrix_estimated = np.dot(A, B)
with storage_client.bucket('data604-project-g3-data-free').blob(f"ml/collaborative_filtering/user_item_matrix.pkl").open("wb") as file:
pickle.dump(user_item_matrix_estimated, file)
with storage_client.bucket('data604-project-g3-public-free').blob(f"rec_users.txt").open("w") as file:
for user in user_item_matrix.index.tolist():
file.write(f"{user}\n")
with storage_client.bucket('data604-project-g3-public-free').blob(f"rec_apps.txt").open("w") as file:
for app in user_item_matrix.columns.tolist():
file.write(f"{app}\n")
return {"status": "Success"}
import functions_framework
import pandas as pd
import pickle
from google.cloud import bigquery, storage
import numpy as np
import json
from sklearn.metrics.pairwise import cosine_similarity
def get_arg(arg_name, request_json, request_args):
if request_json and arg_name in request_json:
val = request_json[arg_name]
elif request_args and arg_name in request_args:
val = request_args[arg_name]
else:
val = None
return val
@functions_framework.http
def main(request):
request_json = request.get_json(silent=True)
request_args = request.args
storage_client = storage.Client()
# Make the query
with storage_client.bucket('data604-project-g3-data-free').blob("ml/collaborative_filtering/data.pkl").open("rb") as file:
df = pickle.load(file)
with storage_client.bucket('data604-project-g3-data-free').blob("ml/collaborative_filtering/explain.pkl").open("rb") as file:
similarity_dict = pickle.load(file)
with storage_client.bucket('data604-project-g3-data-free').blob("ml/collaborative_filtering/user_item_matrix.pkl").open("rb") as file:
user_item_matrix = pickle.load(file)
with storage_client.bucket('data604-project-g3-public-free').blob("rec_users.txt").open("r") as file:
users = [row for row in file.read().split("\n") if len(row)]
with storage_client.bucket('data604-project-g3-public-free').blob("rec_apps.txt").open("r") as file:
apps = [row for row in file.read().split("\n") if len(row)]
# Get recommendations for each user
user_id = get_arg('user_id', request_json, request_args)
apps_seen = df[df['user_id'] == user_id]['package_name'].tolist()
unseen_apps = [app for app in apps if app not in apps_seen]
# Find the most similar user to the target user
most_similar_user_id = similarity_dict.get(user_id)
# Get recommendations for the user
if user_id in users:
recommendations = user_item_matrix[users.index(user_id)]
score_list = []
for app in unseen_apps:
score_list.append([app, recommendations[apps.index(app)]])
top_n = sorted(score_list, key = lambda x:-x[1])
else:
top_n = []
if request.method == 'OPTIONS':
# Allows GET requests from any origin with the Content-Type
# header and caches preflight response for an 3600s
headers = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Max-Age': '3600'
}
return ('', 204, headers)
# Set CORS headers for the main request
headers = {
'Access-Control-Allow-Origin': '*'
}
return (json.dumps({"rec": top_n[:5], "explain": most_similar_user_id}), 200, headers)