Oracle分析函数、多维函数简单应用

2020-05-22 00:00:00 函数 字段 统计 分组 华北地区

十年前写的,关于Oracle分析函数、多维函数简单应用,主要针对BI报表统计,不一定很全面,但对BI应用场景做了少许说明。

以下代码均经过测试,可直接运行。

代码示例

  1. --创建一张销售数量表,数据趋势是递增的

  2. CREATE TABLE ComputerSales AS  

  3. SELECT

  4.  120+TRUNC(rn/12)+ROUND(DBMS_RANDOM.VALUE(1,10)) SalesNumber

  5.   FROM

  6.   (

  7.     SELECT level,ROWNUM rn

  8.       FROM DUAL

  9.    CONNECT BY ROWNUM<=120

  10.   );


  11. --下面用于比较NULL值和非NULL值的统计,可以看出NULL值情况下的COUNT是存在问题的,所以建议数据库系统中好不要使用NULL值列

  12. SELECT

  13.   COUNT(*),

  14.   COUNT(a.SalesNumber),

  15.   COUNT(DISTINCT a.SalesNumber),

  16.   SUM(a.SalesNumber),

  17.   AVG(a.SalesNumber),

  18.   MAX(a.SalesNumber),

  19.   MIN(a.SalesNumber)

  20.   FROM ComputerSales A;

  21. DELETE FROM ComputerSales WHERE SalesNumber IS NULL;

  22. COMMIT;

  23. INSERT INTO ComputerSales VALUES(NULL);

  24. COMMIT;

  25. INSERT INTO ComputerSales VALUES(NULL);

  26. COMMIT;

  27. SELECT

  28.   COUNT(*),

  29.   COUNT(a.SalesNumber),

  30.   COUNT(DISTINCT a.SalesNumber),

  31.   SUM(a.SalesNumber),

  32.   AVG(a.SalesNumber),

  33.   MAX(a.SalesNumber),

  34.   MIN(a.SalesNumber)

  35.   FROM ComputerSales A;

  36. SELECT trunc(dbms_random.value(1,101)), 



  37. DELETE FROM ComputerSales WHERE SalesNumber IS NULL;

  38. COMMIT;

  39. --创建增加了日期字段的表

  40. CREATE TABLE ComputerSalesBAK AS  

  41. SELECT SalesNumber,TRUNC(SYSDATE)+MOD(A.DateSEQ-1,10) SalesDate

  42.   FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;

  43. DROP TABLE ComputerSales;

  44. RENAME ComputerSalesBAK TO ComputerSales;


  45. --下面是两种创建方式,构招Area列和日期列

  46. CREATE TABLE ComputerSalesBAK AS  

  47. SELECT SalesNumber,TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,

  48.        CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'

  49.             WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'

  50.             WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'

  51.             WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'

  52.             ELSE '其他地区'

  53.        END

  54.   FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;

  55. DROP TABLE ComputerSales;

  56. RENAME ComputerSalesBAK TO ComputerSales;


  57. --该例可构造SalesDateArea的重复数据

  58. CREATE TABLE ComputerSalesBAK AS

  59. SELECT SalesNumber,

  60.        TRUNC(SYSDATE)+MOD(A.DateSEQ-1,10) SalesDate,

  61.        CASE WHEN AreaSEQ=1 THEN '华南地区'

  62.             WHEN AreaSEQ=2 THEN '华北地区'

  63.             WHEN AreaSEQ=3 THEN '东北地区'

  64.             WHEN AreaSEQ=4 THEN '华东地区'

  65.             ELSE '其他地区'

  66.        END

  67.   FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ,ROUND(dbms_random.VALUE(1,5)) AreaSEQ FROM ComputerSales) A;

  68. DROP TABLE ComputerSales;

  69. RENAME ComputerSalesBAK TO ComputerSales;

  70.  


  71. --移动平均值,累计求和,当前窗口平均值,当前窗口求和,以及窗口函数和排序函数的作用域

  72. SELECT

  73.   Area,SalesDate,SalesNumber,

  74.   MIN(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS min_Area_SalesDate,

  75.   MAX(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS max_Area_SalesDate,

  76.   AVG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS avg_Area_SalesDate,  

  77.   SUM(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS sum_Area_SalesDate,  

  78.   COUNT(*) OVER (PARTITION BY Area ORDER BY SalesDate) AS count_Area,

  79.   MIN(SalesNumber) OVER (PARTITION BY Area) AS min_Area,

  80.   MAX(SalesNumber) OVER (PARTITION BY Area) AS max_Area,

  81.   AVG(SalesNumber) OVER (PARTITION BY Area) AS avg_Area,  

  82.   SUM(SalesNumber) OVER (PARTITION BY Area) AS sum_Area,  

  83.   COUNT(*) OVER (PARTITION BY Area) AS count_Area 

  84. FROM ComputerSales


  85. --观察RankDense_Rank,Row_number,Count的区别

  86. --Rank跳号,Dense_Rank不跳号,Row_numberCount按统计数计也跳号

  87. --如果PARTITION BYorder by 的字段是的话,则这四个函数没什么区别

  88. SELECT

  89.   Area,SalesDate,SalesNumber,

  90.   RANK() OVER (PARTITION BY Area order by SalesNumber) AS Rank_Area_SalesNumber,

  91.   DENSE_RANK() OVER (PARTITION BY Area order by SalesNumber) AS DenseRank_Area_SalesNumber,

  92.   ROW_NUMBER() OVER (PARTITION BY Area order by SalesNumber) AS Rownumber_Area_SalesNumber,

  93.   COUNT(*) OVER (PARTITION BY Area order by SalesNumber) AS CountAll_Area_SalesNumber,

  94.   COUNT(SalesNumber) OVER (PARTITION BY Area order by SalesNumber) AS Count_Area_SalesNumber

  95. FROM ComputerSales


  96. --观察LagLead的异同,以及Lag参数之间的异同

  97. --缺省情况下Lag取前一行的值,Lead取后一行的值

  98. --Laglead的个参数决定了取行的位置,第二个参数为取不到值时的缺省值

  99. SELECT

  100.   Area,SalesDate,SalesNumber,

  101.   LAG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lag_Area_SalesNumber, 

  102.   LEAD(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lead_Area_SalesNumber,   

  103.   LAG(SalesNumber,1) OVER (PARTITION BY Area order by SalesDate) AS Lag1_Area_SalesNumber,

  104.   LAG(SalesNumber,2) OVER (PARTITION BY Area order by SalesDate) AS Lag2_Area_SalesNumber,

  105.   LEAD(SalesNumber,1) OVER (PARTITION BY Area order by SalesDate) AS Lead1_Area_SalesNumber,

  106.   LEAD(SalesNumber,2) OVER (PARTITION BY Area order by SalesDate) AS Lead2_Area_SalesNumber,

  107.   LAG(SalesNumber,1,) OVER (PARTITION BY Area order by SalesDate) AS Lag10_Area_SalesNumber,

  108.   LAG(SalesNumber,2,1) OVER (PARTITION BY Area order by SalesDate) AS Lag21_Area_SalesNumber,

  109.   LEAD(SalesNumber,1,) OVER (PARTITION BY Area order by SalesDate) AS Lead10_Area_SalesNumber,

  110.   LEAD(SalesNumber,2,1) OVER (PARTITION BY Area order by SalesDate) AS Lead21_Area_SalesNumber 

  111. FROM ComputerSales


  112. --观察First_ValueLast_Value的不同

  113. --如果取同一个同组中大值小值对应的某列,使用FIRST_VALUE,按照升降序排列即可

  114. --LAST_VALUE有些像两次分组所求的后一行

  115. SELECT

  116.   Area,SalesDate,SalesNumber,

  117.   FIRST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber) AS FirstValue_Area, 

  118.   FIRST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber DESC) AS FirstValue_Area_Desc,   

  119.   LAST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber) AS LastValue_Area,

  120.   LAST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber DESC) AS LastValue_Area_Desc

  121. FROM ComputerSales


  122. --与上面不同的是,KEEP需要和DENSE_RANK FIRST |DENSE_RANK LAST配合使用,且取的是相同Area中按SalesNumber排序所获得大或小的值,而上面只是取行或后一行

  123. SELECT Area,SalesDate,SalesNumber,

  124.   DENSE_RANK() OVER(PARTITION BY Area ORDER BY SalesNumber) DENSE_RANK,

  125.   MIN(SalesDate) KEEP (DENSE_RANK FIRST ORDER BY SalesNumber) OVER(PARTITION BY Area) min_first,

  126.   MIN(SalesDate) KEEP (DENSE_RANK LAST ORDER BY SalesNumber) OVER(PARTITION BY Area) min_last,

  127.   MAX(SalesDate) KEEP (DENSE_RANK FIRST ORDER BY SalesNumber) OVER(PARTITION BY Area) max_first,

  128.   MAX(SalesDate) KEEP (DENSE_RANK LAST ORDER BY SalesNumber) OVER(PARTITION BY Area) max_last

  129. FROM ComputerSales


  130. --CUME_DISTPERCENT_RANK差不多,都是累计计算比例,只不过计算基准不同,CUME_DIST更符合一般的做法

  131. --NTILE把数据平分为若干份,更适合用来计算四分位上的值

  132. --RATIO_TO_REPORT,则是求当前值在分区中的比例,且不能与ORDER BY 合起来使用

  133. --PERCENTILE_DISCPERCENTILE_CONT,则是给定的比例参数所对应的值,一般使用PERCENTILE_DISC即可

  134. SELECT Area,SalesDate,SalesNumber,

  135.   ROUND(CUME_DIST() OVER(PARTITION BY Area ORDER BY SalesNumber),2) cume_dist,

  136.   ROUND(PERCENT_RANK() OVER(PARTITION BY Area ORDER BY SalesNumber),2) PERCENT_RANK,

  137.   ROUND(RATIO_TO_REPORT(SalesNumber) OVER(PARTITION BY Area),2) RATIO_TO_REPORT,

  138.   NTILE(4) OVER(PARTITION BY Area ORDER BY SalesNumber) NTILE,

  139.   PERCENTILE_DISC(0.7) WITHIN GROUP (ORDER BY SalesNumber) OVER(PARTITION BY Area) PERCENTILE_DISC,

  140.   PERCENTILE_CONT(0.7) WITHIN GROUP (ORDER BY SalesNumber) OVER(PARTITION BY Area) PERCENTILE_CONT

  141. FROM ComputerSales


  142. --增加了一列叫销售额,可以进行相关数理统计

  143. CREATE TABLE ComputerSalesBAK AS  

  144. SELECT SalesNumber,

  145.        ROUND(SalesNumber*10+5*DBMS_RANDOM.VALUE(1,10)) SalesValue,

  146.        TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,

  147.        CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'

  148.             WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'

  149.             WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'

  150.             WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'

  151.             ELSE '其他地区'

  152.        END Area

  153.   FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;

  154. DROP TABLE ComputerSales;

  155. RENAME ComputerSalesBAK TO ComputerSales;

  156. SELECT * FROM ComputerSales;


  157. --其他统计,对数理分析有研究的同学可以尝试一下其经济学含义

  158. SELECT Area,SalesDate,SalesValue,SalesNumber,

  159.   REGR_SLOPE(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "斜率",

  160.   REGR_INTERCEPT(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "截距",

  161.   REGR_R2(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线决定系数",

  162.   REGR_AVGX(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线自变量平均值",

  163.   REGR_AVGY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线应变量平均值", 

  164.   VAR_POP(SalesValue) OVER(PARTITION BY Area ORDER BY SalesDate) "VAR_POP_应变量", 

  165.   VAR_POP(SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "VAR_POP_自变量", 

  166.   COVAR_POP(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "COVAR_POP",       

  167.   REGR_SXX(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXX",  --REGR_COUNT(expr1, expr2) * VAR_POP(expr2) 

  168.   REGR_SYY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXY",  --REGR_COUNT(expr1, expr2) * VAR_POP(expr1)

  169.   REGR_SXY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXY",  --REGR_COUNT(expr1, expr2) * COVAR_POP(expr1, expr2)  

  170.   REGR_COUNT(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_COUNT"

  171. FROM ComputerSales


  172. --关于按日期进行环比的问题

  173. --同比则有麻烦,因为日期天数是不固定的

  174. --从ComputerSales随机删除几行再测

  175. SELECT AREA,SALESDATE,SALESNUMBER,

  176.   LAG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lag_error,  --如遇断号,会导致数据不准

  177.   SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 1 PRECEDING AND 1 PRECEDING) yesterday, --昨天的值 

  178.   SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 6 PRECEDING AND 6 PRECEDING) lastweek, --上周数据 

  179.   SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 6 PRECEDING AND PRECEDING) last7_accu, --前7天累计,包括当天

  180.   SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 29 PRECEDING AND PRECEDING) last30_accu--前30天累计,包括当天

  181.   FROM ComputerSales

  182.  

  183. --再度增加一个product产品列,以方便进行CUBE函数演示

  184. CREATE TABLE ComputerSalesBAK AS  

  185. SELECT SalesNumber,

  186.        ROUND(SalesNumber*10+5*DBMS_RANDOM.VALUE(1,10)) SalesValue,

  187.        TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,

  188.        CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'

  189.             WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'

  190.             WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'

  191.             WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'

  192.             ELSE '其他地区'

  193.        END Area,

  194.        CASE WHEN ROUND(DBMS_RANDOM.VALUE(1,3))=1 THEN '产品A'

  195.             WHEN ROUND(DBMS_RANDOM.VALUE(1,3))=2 THEN '产品B'

  196.             ELSE '产品C'

  197.        END Product      

  198.   FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;

  199. DROP TABLE ComputerSales;

  200. RENAME ComputerSalesBAK TO ComputerSales;

  201. SELECT * FROM ComputerSales;


  202. --传统的group by语法

  203. SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)

  204.   FROM ComputerSales

  205.  GROUP BY Product,Area,SalesDate

  206.  ORDER BY Product,Area,SalesDate

  207.  

  208. --ROLLUP (group的字段顺序)

  209. --会自动按Group字段分层统计,与日常报表较为相似

  210. SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)

  211.   FROM ComputerSales

  212.  GROUP BY ROLLUP(Product,Area,SalesDate)

  213.  ORDER BY Product,Area,SalesDate --加不加均可,已经自动按分组字段排序

  214.  

  215. --等价于

  216. SELECT * FROM

  217. (

  218. SELECT Product,Area,SalesDate,SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue --大级分组

  219.   FROM ComputerSales

  220.  GROUP BY Product,Area,SalesDate

  221.  UNION ALL

  222. SELECT Product,Area,NULL,SUM(SalesNumber),SUM(SalesValue) --按产品、地区分组

  223.   FROM ComputerSales

  224.  GROUP BY Product,Area,NULL

  225.  UNION ALL

  226. SELECT Product,NULL,NULL,SUM(SalesNumber),SUM(SalesValue) --按产品分组

  227.   FROM ComputerSales

  228.  GROUP BY Product,NULL,NULL

  229.  UNION ALL 

  230. SELECT NULL,NULL,NULL,SUM(SalesNumber),SUM(SalesValue)   --统计总和

  231.   FROM ComputerSales

  232.  GROUP BY NULL,NULL,NULL

  233. ) ORDER BY 1,2,3                                         --后再排序

  234.  

  235.  

  236. --CUBE (group的字段顺序),与OLAP比较相似,求得所有维度的交汇点

  237. --会自动按Group字段排列组合进行统计

  238. SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)

  239.   FROM ComputerSales

  240.  GROUP BY CUBE(Product,Area,SalesDate)

  241.  ORDER BY Product,Area,SalesDate --加不加均可,已经自动按分组字段排序

  242. --两则的区别

  243. --即ROLLUP C(3,1)即多了3

  244. --按照Product,Area,SalesDateProduct,AreaProductALL的顺序进行了统计

  245. --CUBE的统计层级则为2N次方,即全部的有序组合

  246. --按照Product,Area,SalesDate;Product,Area;Product,SalesDate;Product;Area,SalesDate;Area;SalesDate;ALL的顺序进行了统计

  247. --与ROLLUP的等价表达式,相当于ROLLUP的排列组合

  248. SELECT * FROM

  249. (

  250. SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue) --先按Product,Area,SalesDateROLLUP

  251.   FROM ComputerSales

  252.  GROUP BY ROLLUP(Product,Area,SalesDate)

  253. UNION

  254. SELECT Product,NULL,SalesDate,SUM(SalesNumber),SUM(SalesValue) --再按Product,SalesDateROLLUP

  255.   FROM ComputerSales

  256.  GROUP BY ROLLUP(Product,NULL,SalesDate)

  257. UNION

  258. SELECT NULL,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue) --再按Area,SalesDateROLLUP

  259.   FROM ComputerSales

  260.  GROUP BY ROLLUP(NULL,Area,SalesDate)

  261. UNION

  262. SELECT NULL,NULL,SalesDate,SUM(SalesNumber),SUM(SalesValue) --后按SalesDateROLLUP

  263.   FROM ComputerSales

  264.  GROUP BY ROLLUP(NULL,NULL,SalesDate)

  265.  )

  266.  ORDER BY 1,2,3


  267. --GROUPING SETS等同于按三列单独求统计,一般不常用

  268. SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)

  269.   FROM ComputerSales

  270.  GROUP BY GROUPING SETS(Product,Area,SalesDate)

  271.  ORDER BY Product,Area,SalesDate ;--加不加均可,已经自动按分组字段排序

  272. --等价于

  273. SELECT * FROM

  274. (

  275. SELECT Product,NULL Area,NULL SalesDate,SUM(SalesNumber),SUM(SalesValue) --按产品分组

  276.   FROM ComputerSales

  277.  GROUP BY Product,NULL,NULL

  278.  UNION ALL

  279. SELECT NULL,Area,NULL,SUM(SalesNumber),SUM(SalesValue) --按地区分组

  280.   FROM ComputerSales

  281.  GROUP BY NULL,Area,NULL

  282.  UNION ALL

  283. SELECT NULL,NULL,SalesDate,SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue --按日期分组

  284.   FROM ComputerSales

  285.  GROUP BY NULL,NULL,SalesDate

  286. ) ORDER BY 1,2,3    


  287. --GROUPING函数只接受一个参数,参数为数据表的一列。如果该列为空返回1,否则返回

  288. --并且它仅能与 GROUP BY,ROLLUP,CUBE,GROUPING SETS 一起使用。

  289. --稍微运行一下,就发现该函数只是为了做BI报表使用的,把统计行变为1,将来用作字符串替代

  290. SELECT GROUPING(Product), Product,GROUPING(Area),Area,GROUPING(SalesDate),SalesDate,SUM(SalesNumber),SUM(SalesValue)

  291.   FROM ComputerSales

  292.  GROUP BY ROLLUP(Product,Area,SalesDate)

  293.  ORDER BY Product,Area,SalesDate ;

  294. --BI标准报表格式

  295. SELECT

  296.   DECODE(ProductFlag,1,'产品汇总',Product),

  297.   DECODE(AreaFlag,1,'地区汇总',Area),

  298.   DECODE(SalesDateFlag,1,'日期汇总',TO_CHAR(SalesDate,'YYYY-MM-DD')),

  299.   SalesNumber,SalesValue

  300.   FROM

  301. (

  302. SELECT

  303.   GROUPING(Product) ProductFlag, Product,

  304.   GROUPING(Area) AreaFlag,Area,

  305.   GROUPING(SalesDate) SalesDateFlag,SalesDate,

  306.   SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue

  307.   FROM ComputerSales

  308.  GROUP BY ROLLUP(Product,Area,SalesDate)

  309.  ORDER BY Product,Area,SalesDate

  310. )


  311. --GROUPING_ID其实和GROUPING原理差不多,GROUPING参数为单值,且只返回1,1

  312. --GROUPING_ID,则返回按2的指数进行累计得到空值区域的值

  313. SELECT Product,Area,SalesDate,

  314.        GROUPING_ID(Product,Area,SalesDate) GROUPING421,

  315.        GROUPING_ID(Product,Area) GROUPPING21,

  316.        GROUPING_ID(Product) GROUPING1,

  317.        SUM(SalesNumber),

  318.        SUM(SalesValue)

  319.   FROM ComputerSales

  320.  GROUP BY ROLLUP(Product,Area,SalesDate)

  321.  ORDER BY Product,Area,SalesDate ;--加不加均可,已经自动按分组字段排序

  322.  

  323. --GROUP_ID函数可以区分重复分组结果,第1 次出现为,以后每次出现增1

  324. --GROUP_ID单独答应在SELECT 中出现意义不大,常在HAVING 中使用达到过滤重复统计的目的。

  325. SELECT Product,Area,SalesDate,GROUP_ID(),

  326.        SUM(SalesNumber),SUM(SalesValue)

  327.   FROM ComputerSales

  328.  GROUP BY CUBE(Product,Area),CUBE(Product,SalesDate)

  329. HAVING GROUP_ID()=

  330.  ORDER BY 1,2,3

  331. --例如该例子中分别按Product,AreaProduct,SalesDate会导致产品地区、产品时间的重复计算,导致报表的不清晰

  332. --我们用HAVING GROUP_ID()=把重复计算的行去掉就OK

  333. --一般情况下不建议报表程序过度分组,否则到后连自己都搞糊涂了

  334. --GROUP BY,ROLLUP,CUBE能组合使用,但SELECT中的分组字段必须出现在GROUP BY的相关栏位


  335. --MODEL:MODEL语句的关键字,必须。

  336. --DIMENSION BY:DIMENSION维度的意思,可以理解为数组的索引,必须。

  337. --MEASURES:指定作为数组的列

  338. --RULES:对数组进行各种操作的描述。

  339. --暂时还没搞明白如何应用,只是简单实现了一个求上月、前30天、前7天,前1天的例子

  340. SELECT AREA,PRODUCT,SALESDATE,SALESNUMBER,

  341.        AVG30DAY,AVG1MONTH, --近30天的平均值,近一个月的平均值

  342.        ACCU30DAY,ACCU1MONTH, --近30天的累加值,近一个月的累加值

  343.        SALESNUMBER1DAY,SALESNUMBER7DAY, --昨天的销售额,一周前的销售额

  344.        SALESNUMBER30DAY,SALESNUMBER1MONTH  --30天的销售额,上月同天的销售额

  345.   FROM ComputerSales

  346.  MODEL DIMENSION BY (AREA,PRODUCT,SALESDATE)

  347.  MEASURES (SALESNUMBER, AVG30DAY, AVG1MONTH, ACCU30DAY, ACCU1MONTH, SALESNUMBER1DAY, SALESNUMBER7DAY, SALESNUMBER30DAY, SALESNUMBER1MONTH)

  348.  RULES UPDATE

  349.  (AVG30DAY[ANY,ANY,ANY]=AVG(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-29 AND CV(SALESDATE)],

  350.   AVG1MONTH[ANY,ANY,ANY]=AVG(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN ADD_MONTHS(CV(SALESDATE),-1) AND CV(SALESDATE)],

  351.   ACCU30DAY[ANY,ANY,ANY]=SUM(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)],

  352.   ACCU1MONTH[ANY,ANY,ANY]=SUM(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN ADD_MONTHS(CV(SALESDATE),-1) AND CV(SALESDATE)],

  353.   SALESNUMBER1DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-1 AND CV(SALESDATE)-1],

  354.   SALESNUMBER7DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-7 AND CV(SALESDATE)-7],

  355.   SALESNUMBER30DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)-30],

  356.   SALESNUMBER1MONTH[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)-30] 

  357.   )

  358. ORDER BY 1,2,3



  359. 关于按年月环比统计中可能出现的问题


  360. CREATE TABLE TEST (SALESMONTH VARCHAR(6),SALESNUMBER INT) ;

  361. INSERT INTO TEST VALUES('201002',2);

  362. INSERT INTO TEST VALUES('201004',4);

  363. INSERT INTO TEST VALUES('201007',7);

  364. INSERT INTO TEST VALUES('201008',8);

  365. INSERT INTO TEST VALUES('201010',10);


  366. SELECT SALESMONTH,SALESNUMBER,

  367. LAG(SalesNumber) OVER(order by SalesMONTH) AS Lag10_Area_SalesNumber,

  368. --如遇断号,会导致数据不准

  369. SUM(SalesNumber) OVER(ORDER BY TO_DATE(SalesMONTH||'01','YYYYMMDD') RANGE BETWEEN 1 PRECEDING AND 1 PRECEDING)

  370. FROM TEST


  371. 遇到一个问题,假如BI报表中的月份是字符串,而碰巧断月了,如何准确求得上个月的数据,理应为空

  372. 如果是天的话可以想办法规避掉,如果是字符串月没想好怎么处理


  373. newkid给了算法

  374. SELECT SALESMONTH,SALESNUMBER, 

  375.   MAX(SalesNumber) OVER(order by TO_DATE(SalesMONTH,'YYYYMM') RANGE BETWEEN 31 PRECEDING AND 1 PRECEDING )

  376. FROM TEST;

  377. 但我觉得结果很正确,但是不保险,而且有点迷糊

  378. 是把当前的月份转换成当月的天,并且向前推31天到前1

  379. 假如当前月是2月,向前推31天应该到去年12月份了,求的 MAX(SalesNumber) 未必有效

  380. 可实际结果是正确的,奇怪



相关文章