通过csv文件提供的数据库表内容: links.csv的格式: movies.csv格式,一个movie可以有多种风格(genres),通过|分隔: ratings.csv: 用户给movie打得分: tags.csv:movie的标签 练习一:列出四张表的总记录数: select 'links' as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.LINKS"union allselect 'movies' as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"union allselect 'ratings' as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"union allselect 'tags' as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.TAGS";
执行结果: 练习2:计算总共9125部电影,一共包含多少艺术类别?DOBEGIN
DECLARE genreArray NVARCHAR(255) ARRAY;
DECLARE tmp NVARCHAR(255);
DECLARE idx INTEGER;
DECLARE sep NVARCHAR(1) := '|';
DECLARE CURSOR cur FOR SELECT DISTINCT "GENRES" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES";
DECLARE genres NVARCHAR (255) := '';
idx := 1;
FOR cur_row AS cur() DO
SELECT cur_row."GENRES" INTO genres FROM DUMMY;
tmp := :genres;
WHILE LOCATE(:tmp,:sep) > 0 DO
genreArray[:idx] := SUBSTR_BEFORE(:tmp,:sep);
tmp := SUBSTR_AFTER(:tmp,:sep);
idx := :idx + 1;
END WHILE;
genreArray[:idx] := :tmp;
END FOR;
genreList = UNNEST(:genreArray) AS ("GENRE");
SELECT "GENRE" FROM :genreList GROUP BY "GENRE";END;
执行结果,总共包含18种: 练习3:计算每种艺术类别总共包含多少部电影:DOBEGIN
DECLARE genreArray NVARCHAR(255) ARRAY;
DECLARE tmp NVARCHAR(255);
DECLARE idx INTEGER;
DECLARE sep NVARCHAR(1) := '|';
DECLARE CURSOR cur FOR SELECT DISTINCT "GENRES" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES";
DECLARE genres NVARCHAR (255) := '';
idx := 1;
FOR cur_row AS cur() DO
SELECT cur_row."GENRES" INTO genres FROM DUMMY;
tmp := :genres;
WHILE LOCATE(:tmp,:sep) > 0 DO
genreArray[:idx] := SUBSTR_BEFORE(:tmp,:sep);
tmp := SUBSTR_AFTER(:tmp,:sep);
idx := :idx + 1;
END WHILE;
genreArray[:idx] := :tmp;
END FOR;
genreList = UNNEST(:genreArray) AS ("GENRE");
SELECT "GENRE", count(1) FROM :genreList GROUP BY "GENRE";END;
练习4:列出每部电影包含的风格数目:SELECT
"MOVIEID"
, "TITLE"
, OCCURRENCES_REGEXPR('[|]' IN GENRES) + 1 "GENRE_COUNT"
, "GENRES"FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"ORDER BY "GENRE_COUNT" ASC;
练习5:罗列出每部电影的风格分布情况SELECT
"GENRE_COUNT"
, COUNT(1)FROM (
SELECT
OCCURRENCES_REGEXPR('[|]' IN "GENRES") + 1 "GENRE_COUNT"
FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES")GROUP BY "GENRE_COUNT" ORDER BY "GENRE_COUNT";
比如至少拥有1个风格的电影,有2793部,2个风格的电影有3039部,等等。 练习6:计算movie的rating分布情况SELECT DISTINCT
MIN("RATING_COUNT") OVER( ) AS "MIN",
MAX("RATING_COUNT") OVER( ) AS "MAX",
AVG("RATING_COUNT") OVER( ) AS "AVG",
SUM("RATING_COUNT") OVER( ) AS "SUM",
MEDIAN("RATING_COUNT") OVER( ) AS "MEDIAN",
STDDEV("RATING_COUNT") OVER( ) AS "STDDEV",
COUNT(*) OVER( ) AS "CATEGORY_COUNT"FROM (
SELECT "MOVIEID", COUNT(1) as "RATING_COUNT"
FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"
GROUP BY "MOVIEID")GROUP BY "RATING_COUNT";
明细情况: SELECT "RATING_COUNT", COUNT(1) as "MOVIE_COUNT"FROM (
SELECT "MOVIEID", COUNT(1) as "RATING_COUNT"
FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"
GROUP BY "MOVIEID")GROUP BY "RATING_COUNT" ORDER BY "RATING_COUNT" asc;
比如有397部电影的用户投票数为5票 练习7:统计用户投票情况SELECT "RATING_COUNT", COUNT(1) as "USER_COUNT"FROM (
SELECT "USERID", COUNT(1) as "RATING_COUNT"
FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"
GROUP BY "USERID")GROUP BY "RATING_COUNT" ORDER BY 1 DESC;
有一位用户投了2391票,一位用户投了1868票: 练习8:统计用户投票得分情况SELECT "RATING", COUNT(1) as "RATING_COUNT"FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"GROUP BY "RATING" ORDER BY 1 DESC;
有15095份用户投票,打的分数是5分 要获取更多Jerry的原创文章,请关注公众号"汪子熙":
|