20220721数据库基础-谓词逻辑转写sql – HYL Studio

20220721数据库原理

同事在学数据库，问我这个题，重新总结下我上学时候的经典套路

这个套路针对任意存在性和任意性判定的sql编写或集合运算，当时数据库老师疯狂改题目条件，导致sql毫无规律且从字面含义上难以正向理解运算逻辑。为了考试以不变应万变总结了这个套路，核心思想是不要纠结于最终sql的语法表达，而是通过逻辑化简来确保集合元素的正确性，化简后再转写成sql

主要运用集合论、关系代数基础以及离散数学的谓词逻辑，这些应该是我要做sql自动生成和优化的理论基础

题目经典的三张表如下

学生表s(sid,sname)

课程表c(cid,cname)

选课关系表sc(scid,sid,cid)

求至少学过lz同学学过所有课程的同学的学号

编写过程如下

首先做形式化表达和逻辑化简,原则：逻辑取反极可能少，尽可能使用存在性判断

谓词逻辑定义f(x,y) = 学生x选择了课程y

{x|对任意y,f(lz,y)->f(x,y)}

–逆否命题

{x|!(存在y,!(f(lz,y) -> f(x,y)))}

–蕴含等值 p->q === !p||q

{x|!(存在y,!(!f(lz,y) || f(x,y)))}

{x|对任意x,!(存在y,!(!f(lz,y) || f(x,y)))}

–等价替换

{x|!(存在y,(f(lz,y) && !f(x,y)))}

{x|对任意x,!(存在y,(f(lz,y) && !f(x,y)))}

–对任意x

select sc.sid from sc where 1=1

— f(x,y)

exists (

select * from sc where sc.sid = x and sc.cid = y

)

— f(lz,y)

exists (

select * from sc where sc.sid = ‘lz’ and sc.cid = y

)

— 存在y, (f(lz,y) && !f(x,y))

exists (– 存在y, 取y = sc1.cid 看作是定值，但要注意给表加别名

select sc1.cid from sc sc1 where exists (

select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid –f(lz,y)

)

and not exists ( –!f(x,y)

select * from sc sc3 where sc3.sid = x and sc3.cid = sc1.cid

)

— 对任意x, 取x = sc0.sid

select * from sc sc0 where not exists (

select sc1.cid from sc sc1 where exists (

select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid

)

and not exists (

select * from sc sc3 where sc3.sid = sc0.sid and sc3.cid = sc1.cid

)

— 从来没有人告诉我存在这样一个化简的过程，这个方法多年间也未曾见到有人和我讲过，以上中间过程我翻遍了互联网也没找到。这是我上学时候应对考试用的通解套路，工作后好久没这么详细的写过sql转写了，简单的业务都是直接写sql结果而快忘却了正确的通解解法

— 在exists内select sc1.cid和select *是一样的，判定结果只看行数，但语义完全不一样，如果是从谓词逻辑转来的不应该都是*

— 另外，从exists子句转为where子句是容易的，反过来理论上也一样，但网上的所谓教程根本不会讲这个是怎么来的

— 因子句存在永真式sc1.cid = sc1.cid 且存在性判定可由where条件代替一次子查询

— 所以接下来才是你在网上轻易看到的答案格式，也就是课本或老师PPT给的的答案

exists (– y sc1.cid

select sc1.cid from sc sc1 where sc1.sid = ‘lz’ and sc1.cid = sc1.cid –f(lz,y)

and not exists ( — !f(x,y)

select * from sc sc2 where sc2.sid = x and sc2.cid = sc1.cid

)

–投影输出结果

select sc0.sid from sc sc0 where not exists (

select sc1.cid from sc sc1 where sc1.sid = ‘lz’ and not exists (

select * from sc sc2 where sc2.cid = sc1.cid and sc0.sid = sc2.sid

)

–等价答案

— {x|对任意x,!(存在y,!(!f(lz,y) || f(x,y)))}

exists (– 存在y, 取y = sc1.cid 看作是定值，但要注意给表加别名

select sc1.cid from sc sc1 where not(

not exists (

select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid –f(lz,y)

)

exists ( –f(x,y)

select * from sc sc3 where sc3.sid = x and sc3.cid = sc1.cid

)

— 对任意x, 取x = sc0.sid

select * from sc sc0 where not (

exists (– 存在y, 取y = sc1.cid 看作是定值，但要注意给表加别名

select sc1.cid from sc sc1 where not (

not exists (

select * from sc sc2 where sc2.sid = ‘lz’ and sc2.cid = sc1.cid — f(lz,y)

)

exists ( — f(x,y)

select * from sc sc3 where sc3.sid = sc0.sid and sc3.cid = sc1.cid

)

— 投影输出略

— 语义分析

sc0.sid = sc2.sid的子查询条件字面意义上是扫描了sc的每一行来分别做两次集合的存在性判定

第1行数据执行

id sid cid

sc0 1 lz 1

select sc0.sid from sc sc0 where not exists (

select * from sc sc1 where sc1.sid = ‘lz’ and not exists (

select * from sc sc2 where sc2.cid = 1 and ‘lz’ = sc2.sid

)

select sc0.sid from sc sc0 where not exists (

select * from sc sc1 where sc1.sid = ‘lz’ and not true

)

select sc0.sid from sc sc0 where not exists (

empty

)

select sc0.sid from sc sc0 where true

1 lz 1输出


CREATE DATABASE test;
use test;
CREATE TABLE sc (
    id int(11) PRIMARY KEY NOT NULL AUTO_INCREMENT,
    sid varchar(255),
    cid varchar(255)
);
INSERT INTO sc(sid,cid) VALUES
('lz', '1'),
('lz', '2'),
('a', '1'),
('b', '2'),
('c', '1'),
('c', '2'),
('c', '3')
;
SELECT * FROM sc;
+----+------+------+
| id | sid  | cid  |
+----+------+------+
|  1 | lz   | 1    |
|  2 | lz   | 2    |
|  3 | a    | 1    |
|  4 | b    | 2    |
|  5 | c    | 1    |
|  6 | c    | 2    |
|  7 | c    | 3    |
+----+------+------+
7 rows in set (0.00 sec)