20220721数据库基础-谓词逻辑转写sql

20220721数据库原理
同事在学数据库,问我这个题,重新总结下我上学时候的经典套路
这个套路针对任意存在性和任意性判定的sql编写或集合运算,当时数据库老师疯狂改题目条件,导致sql毫无规律且从字面含义上难以正向理解运算逻辑。为了考试以不变应万变总结了这个套路,核心思想是不要纠结于最终sql的语法表达,而是通过逻辑化简来确保集合元素的正确性,化简后再转写成sql
主要运用集合论、关系代数基础以及离散数学的谓词逻辑,这些应该是我要做sql自动生成和优化的理论基础
题目经典的三张表如下
学生表s(sid,sname)
课程表c(cid,cname)
选课关系表sc(scid,sid,cid)
求至少学过lz同学学过所有课程的同学的学号
编写过程如下
首先做形式化表达和逻辑化简,原则:逻辑取反极可能少,尽可能使用存在性判断
谓词逻辑定义f(x,y) = 学生x选择了课程y
{x|对任意y,f(lz,y)->f(x,y)}
–逆否命题
{x|!(存在y,!(f(lz,y) -> f(x,y)))}
–蕴含等值 p->q === !p||q
{x|!(存在y,!(!f(lz,y) || f(x,y)))}
{x|对任意x,!(存在y,!(!f(lz,y) || f(x,y)))}
–等价替换
{x|!(存在y,(f(lz,y) && !f(x,y)))}
{x|对任意x,!(存在y,(f(lz,y) && !f(x,y)))}
–对任意x
select sc.sid from sc where 1=1
— f(x,y)
exists (
    select * from sc where sc.sid = x and sc.cid = y
)
— f(lz,y)
exists (
    select * from sc where sc.sid = ‘lz’ and sc.cid = y
)
— 存在y, (f(lz,y) && !f(x,y))
exists (– 存在y, 取y = sc1.cid  看作是定值,但要注意给表加别名
    select sc1.cid from sc sc1 where exists (
        select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid –f(lz,y)
    )
    and not exists ( –!f(x,y)
        select * from sc sc3 where sc3.sid = x and sc3.cid = sc1.cid
    )
)
— 对任意x, 取x = sc0.sid
select * from sc sc0 where not exists (
    select sc1.cid from sc sc1 where exists (
        select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid
    )
    and not exists (
        select * from sc sc3 where sc3.sid = sc0.sid and sc3.cid = sc1.cid
    )
)
— 从来没有人告诉我存在这样一个化简的过程,这个方法多年间也未曾见到有人和我讲过,以上中间过程我翻遍了互联网也没找到。这是我上学时候应对考试用的通解套路,工作后好久没这么详细的写过sql转写了,简单的业务都是直接写sql结果而快忘却了正确的通解解法
— 在exists内select sc1.cid和select *是一样的,判定结果只看行数,但语义完全不一样,如果是从谓词逻辑转来的不应该都是*
— 另外,从exists子句转为where子句是容易的,反过来理论上也一样,但网上的所谓教程根本不会讲这个是怎么来的
— 因子句存在永真式sc1.cid = sc1.cid 且 存在性判定可由where条件代替一次子查询
— 所以接下来才是你在网上轻易看到的答案格式,也就是课本或老师PPT给的的答案
exists (– y sc1.cid
    select sc1.cid from sc sc1 where sc1.sid = ‘lz’ and sc1.cid = sc1.cid –f(lz,y)
    and not exists ( — !f(x,y)
        select * from sc sc2 where sc2.sid = x and sc2.cid = sc1.cid
    )
)
–投影输出结果
select sc0.sid from sc sc0 where not exists (
     select sc1.cid from sc sc1 where sc1.sid = ‘lz’ and not exists (
        select * from sc sc2 where sc2.cid = sc1.cid and sc0.sid = sc2.sid
    )
)
–等价答案
— {x|对任意x,!(存在y,!(!f(lz,y) || f(x,y)))}
exists (– 存在y, 取y = sc1.cid  看作是定值,但要注意给表加别名
    select sc1.cid from sc sc1 where not(
        not exists (
            select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid –f(lz,y)
        )
        or
        exists ( –f(x,y)
            select * from sc sc3 where sc3.sid = x and sc3.cid = sc1.cid
        )
    )
)
— 对任意x, 取x = sc0.sid
select * from sc sc0 where not (
    exists (– 存在y, 取y = sc1.cid  看作是定值,但要注意给表加别名
        select sc1.cid from sc sc1 where not (
            not exists (
                select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid — f(lz,y)
            )
            or
            exists ( — f(x,y)
                select * from sc sc3 where sc3.sid = sc0.sid and sc3.cid = sc1.cid
            )
        )
    )
)
— 投影输出略
— 语义分析
sc0.sid = sc2.sid的子查询条件字面意义上是扫描了sc的每一行来分别做两次集合的存在性判定
第1行数据执行
id sid cid
sc0 1 lz 1
select sc0.sid from sc sc0 where not exists (
    select * from sc sc1 where sc1.sid = ‘lz’ and not exists (
        select * from sc sc2 where sc2.cid = 1 and ‘lz’ = sc2.sid
    )
)
select sc0.sid from sc sc0 where not exists (
    select * from sc sc1 where sc1.sid = ‘lz’ and not true
)
select sc0.sid from sc sc0 where not exists (
    empty
)
select sc0.sid from sc sc0 where true
1 lz 1输出
CREATE DATABASE test;
use test;
CREATE TABLE sc (
    id int(11) PRIMARY KEY NOT NULL AUTO_INCREMENT,
    sid varchar(255),
    cid varchar(255)
);
INSERT INTO sc(sid,cid) VALUES
('lz', '1'),
('lz', '2'),
('a', '1'),
('b', '2'),
('c', '1'),
('c', '2'),
('c', '3')
;
SELECT * FROM sc;
+----+------+------+
| id | sid  | cid  |
+----+------+------+
|  1 | lz   | 1    |
|  2 | lz   | 2    |
|  3 | a    | 1    |
|  4 | b    | 2    |
|  5 | c    | 1    |
|  6 | c    | 2    |
|  7 | c    | 3    |
+----+------+------+
7 rows in set (0.00 sec)
0 Comments
Leave a Reply