{"id":347,"date":"2020-10-01T07:00:33","date_gmt":"2020-09-30T23:00:33","guid":{"rendered":"https:\/\/www.mengqingzhong.com\/?p=347"},"modified":"2024-01-14T11:38:31","modified_gmt":"2024-01-14T03:38:31","slug":"postgresql-index-gin-7","status":"publish","type":"post","link":"https:\/\/www.mengqingzhong.com\/2020\/10\/01\/postgresql-index-gin-7\/","title":{"rendered":"PostgreSQL\u7d22\u5f15(7) \u2013 GIN"},"content":{"rendered":"

\n\u672c\u6587\u7ffb\u8bd1\u81eaEgor Rogov\u7684\u82f1\u6587\u535a\u5ba2<\/a>\uff0c\u4e14\u5df2\u5f81\u5f97Egor Rogov\u7684\u540c\u610f\u3002\u8bd1\u8005\u505a\u4e86\u5c11\u91cf\u4fee\u6539\u3002\n<\/p>\n

\n\u5ba1\u6821\uff1a\u7eaa\u6600\u7ea2\uff0c\u4e2d\u56fd\u4eba\u6c11\u5927\u5b66\u4fe1\u606f\u5b66\u9662\u5728\u8bfb\u535a\u58eb\u751f\u3002\n<\/p>\n

\u5728\u524d\u9762\u51e0\u7bc7\u6587\u7ae0\u4e2d\uff0c\u6211\u4eec\u8ba8\u8bba\u4e86\u901a\u7528\u7d22\u5f15\u5f15\u64ce\u3001AM\u7684\u63a5\u53e3\u548c\u56db\u79cdAM\uff08Hash\u7d22\u5f15\u3001B-tree\u7d22\u5f15\u3001GiST\u7d22\u5f15\u3001SP-GiST\u7d22\u5f15\uff09\u3002\u8fd9\u7bc7\u6587\u7ae0\u5c06\u4ecb\u7ecdGIN\u7d22\u5f15\u3002<\/p>\n

GIN\u662fGeneralized Inverted Index\u7684\u7f29\u5199\uff0c\u5c31\u662f\u6240\u8c13\u7684\u5012\u6392\u7d22\u5f15\u3002\u5b83\u6240\u64cd\u4f5c\u7684\u6570\u636e\u7c7b\u578b\u7684\u503c\u90fd\u4e0d\u662f\u539f\u5b50\u7684\uff0c\u800c\u662f\u7531\u591a\u4e2a\u5143\u7d20\u7ec4\u6210\u3002\u6211\u4eec\u628a\u8fd9\u4e9b\u7c7b\u578b\u79f0\u4e3a\u590d\u5408\u7c7b\u578b\u3002\u6211\u4eec\u4e0d\u662f\u5bf9\u539f\u59cb\u7684\u590d\u5408\u7c7b\u578b\u5efa\u7acb\u7d22\u5f15\uff0c\u800c\u662f\u5bf9\u590d\u5408\u7c7b\u578b\u4e2d\u7684\u5355\u4e2a\u5143\u7d20\u5efa\u7acb\u7d22\u5f15\u3002\u6bcf\u4e2a\u5143\u7d20\u6307\u5411\u590d\u5408\u7c7b\u578b\u7684\u503c\u3002<\/p>\n

\u5b83\u50cf\u4e00\u672c\u4e66\u7ed3\u5c3e\u90e8\u5206\u7684\u201c\u8bcd\u6c47\u51fa\u73b0\u5217\u8868\u201d\uff0c\u5217\u8868\u5217\u51fa\u6bcf\u4e2a\u5355\u8bcd\u5728\u4e66\u4e2d\u51fa\u73b0\u7684\u9875\u7801\u3002GIN\u7d22\u5f15\u5fc5\u987b\u4fdd\u8bc1\u53ef\u4ee5\u5feb\u901f\u68c0\u7d22\u88ab\u7d22\u5f15\u7684\u5143\u7d20\u3002\u56e0\u6b64\uff0c\u8fd9\u4e9b\u5143\u7d20\u88ab\u5b58\u5728\u7c7b\u4f3cB-tree\u7684\u7ed3\u6784\u4e2d\uff08\u4f7f\u7528\u4e86\u66f4\u7b80\u5355\u7684\u7ed3\u6784\uff09\u3002\u4e00\u4e2a\u6709\u5e8f\u7684\u96c6\u5408\u6307\u5411\u57fa\u8868\u7684\u884c\uff0c\u884c\u4e2d\u5305\u542b\u590d\u5408\u5143\u7d20\u7684\u503c\u3002\u6709\u5e8f\uff0c\u5bf9\u6570\u636e\u68c0\u7d22\u6765\u8bf4\u5e76\u4e0d\u91cd\u8981\uff0c\u4f46\u662f\u5bf9\u7d22\u5f15\u7684\u5185\u90e8\u5b58\u50a8\u65b9\u5f0f\u975e\u5e38\u91cd\u8981\u3002<\/p>\n

GIN\u7d22\u5f15\u6c38\u8fdc\u4e0d\u4f1a\u5220\u9664\u5b83\u7d22\u5f15\u7684\u5143\u7d20\u3002\u590d\u5408\u7c7b\u578b\u7684\u503c\u53ef\u80fd\u4f1a\u88ab\u5220\u9664\u3001\u66f4\u6539\uff0c\u4f46\u662f\u6240\u6709\u5143\u7d20\u7684\u96c6\u5408\u53ef\u80fd\u53d8\u5316\u5e76\u4e0d\u5927\u3002\u8fd9\u4e2a\u65b9\u6cd5\u5927\u5927\u7b80\u5316\u4e86GIN\u7d22\u5f15\u7684\u5e76\u53d1\u63a7\u5236\u3002<\/p>\n

\u5982\u679cTID\u5217\u8868\u5f88\u5c0f\uff0c\u53ef\u4ee5\u548c\u5143\u7d20\u653e\u5728\u4e00\u4e2a\u9875\u9762\u5185\uff08\u88ab\u79f0\u4e3aposting list\uff09\u3002\u4f46\u662f\uff0c\u5982\u679cTID\u5217\u8868\u5f88\u5927\uff0c\u5c31\u9700\u8981\u4f7f\u7528\u66f4\u9ad8\u6548\u7684\u6570\u636e\u7ed3\u6784 \u2014\u2014 B-tree\u3002\u8fd9\u9897B-tree\u5b58\u50a8\u5728\u5355\u72ec\u7684\u9875\u9762\u4e2d\uff0c\u88ab\u79f0\u4e3aposting tree\u3002<\/p>\n

\u6240\u4ee5\uff0cGIN\u7d22\u5f15\u5305\u542b\u4e00\u4e2aB-tree\uff0c\u5b83\u5b58\u50a8\u4e86\u5143\u7d20\u3002\u8fd8\u5305\u542b\u53e6\u5916\u4e00\u4e9bB-tree\u6216\u5217\u8868\uff0c\u5b83\u4eec\u7528\u6765\u5b58\u50a8TID\u5217\u8868\u3002<\/p>\n

\u548c\u4e4b\u524d\u8ba8\u8bba\u7684GiST\u3001SP-GiST\u4e00\u6837\uff0cGIN\u4e5f\u4e3a\u5f00\u53d1\u4eba\u5458\u63d0\u4f9b\u4e86\u4e00\u4e2a\u63a5\u53e3\uff0c\u7528\u6765\u652f\u6301\u7b26\u5408\u7c7b\u578b\u4e0a\u7684\u5404\u79cd\u64cd\u4f5c\u3002<\/p>\n

1. \u5168\u6587\u68c0\u7d22<\/h1>\n

GIN\u7d22\u5f15\u7684\u4e3b\u8981\u7528\u9014\u5c31\u662f\u52a0\u901f\u5168\u6587\u68c0\u7d22\u3002<\/p>\n

\u6211\u4eec\u5728\u4ecb\u7ecdGiST\u7d22\u5f15\u65f6\u5df2\u7ecf\u5bf9\u5168\u6587\u68c0\u7d22\u8fdb\u884c\u4e86\u7b80\u8981\u4ecb\u7ecd\uff0c\u4e0b\u9762\u76f4\u5165\u6b63\u9898\u3002\u8fd9\u79cd\u573a\u666f\u4e0b\uff0c\u590d\u5408\u7c7b\u578b\u5c31\u662f\u6587\u6863\uff0c\u5143\u7d20\u5c31\u662f\u6587\u6863\u4e2d\u7684\u8bcd\u7d20\u3002<\/p>\n

\u8003\u8651GiST\u7d22\u5f15\u4e2d\u7684\u4f8b\u5b50\uff1a<\/p>\n

create table ts(doc text, doc_tsv tsvector);\ninsert into ts(doc) values\n  ('Can a sheet slitter slit sheets?'), \n  ('How many sheets could a sheet slitter slit?'),\n  ('I slit a sheet, a sheet I slit.'),\n  ('Upon a slitted sheet I sit.'), \n  ('Whoever slit the sheets is a good sheet slitter.'), \n  ('I am a sheet slitter.'),\n  ('I slit sheets.'),\n  ('I am the sleekest sheet slitter that ever slit sheets.'),\n  ('She slits the sheet she sits on.');\nupdate ts set doc_tsv = to_tsvector(doc);\ncreate index on ts using gin(doc_tsv);\n<\/code><\/pre>\n

GIN\u7d22\u5f15\u6811\u7684\u7ed3\u6784\u53ef\u80fd\u5982\u4e0b\u6240\u793a\uff1a<\/p>\n

\"\"<\/a><\/p>\n

\u548c\u4e4b\u524d\u7684\u56fe\u7247\u4e0d\u540c\uff0cTID\u4f7f\u7528\u9ed1\u8272\u80cc\u666f\u7684\u503c\uff08\u9875\u9762\u53f7\uff0c\u9875\u9762\u5185\u504f\u79fb\uff09\u8868\u793a\u3002<\/p>\n

select ctid, left(doc,20), doc_tsv from ts;\n<\/code><\/pre>\n
  ctid |         left         |                         doc_tsv                         \n-------+----------------------+---------------------------------------------------------\n (0,1) | Can a sheet slitter  | 'sheet':3,6 'slit':5 'slitter':4\n (0,2) | How many sheets coul | 'could':4 'mani':2 'sheet':3,6 'slit':8 'slitter':7\n (0,3) | I slit a sheet, a sh | 'sheet':4,6 'slit':2,8\n (1,1) | Upon a slitted sheet | 'sheet':4 'sit':6 'slit':3 'upon':1\n (1,2) | Whoever slit the she | 'good':7 'sheet':4,8 'slit':2 'slitter':9 'whoever':1\n (1,3) | I am a sheet slitter | 'sheet':4 'slitter':5\n (2,1) | I slit sheets.       | 'sheet':3 'slit':2\n (2,2) | I am the sleekest sh | 'ever':8 'sheet':5,10 'sleekest':4 'slit':9 'slitter':6\n (2,3) | She slits the sheet  | 'sheet':4 'sit':6 'slit':2\n(9 rows)\n<\/code><\/pre>\n

\u5728\u4e0a\u9762\u7684\u4f8b\u5b50\u4e2d\uff0c\u9664\u4e86sheet\u3001slit\u548cslitter\u4e4b\u5916\uff0c\u5176\u5b83\u5143\u7d20\u6240\u6307\u5411\u7684TID\u53ef\u4ee5\u5b58\u653e\u5728\u5e38\u89c4\u7684\u9875\u9762\u4e2d\u3002\u5b83\u4eec\u4e09\u4e2a\u5728\u592a\u591a\u6587\u6863\u4e2d\u51fa\u73b0\u8fc7\uff0c\u56e0\u6b64\u5b83\u4eec\u6240\u6307\u5411\u7684TID\u5217\u8868\u90fd\u5b58\u50a8\u5728\u5355\u72ec\u7684B-tree\u4e2d\u3002<\/p>\n

\u987a\u4fbf\u63d0\u4e00\u53e5\uff0c\u6211\u4eec\u5982\u4f55\u77e5\u9053\u4e00\u4e2a\u8bcd\u7d20\u88ab\u591a\u5c11\u6587\u6863\u5305\u542b\u5462\uff1f\u5bf9\u5c0f\u8868\u6765\u8bf4\uff0c\u53ef\u4ee5\u76f4\u63a5\u7528\u4e0b\u9762\u7684\u67e5\u8be2\uff0c\u4f46\u662f\u5bf9\u5927\u8868\uff0c\u9700\u8981\u7528\u5176\u5b83\u66f4\u9ad8\u6548\u7684\u65b9\u6cd5\u3002<\/p>\n

select (unnest(doc_tsv)).lexeme, count(*) from ts\ngroup by 1 order by 2 desc;\n<\/code><\/pre>\n
  lexeme  | count \n----------+-------\n sheet    |     9\n slit     |     8\n slitter  |     5\n sit      |     2\n upon     |     1\n mani     |     1\n whoever  |     1\n sleekest |     1\n good     |     1\n could    |     1\n ever     |     1\n(11 rows)\n<\/code><\/pre>\n

\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u548c\u5e38\u89c4\u7684B-tree\u4e0d\u540c\uff0cGIN\u7d22\u5f15\u4e2d\u7684\u9875\u9762\u4f7f\u7528\u7684\u662f\u5355\u5411\u7684\u6307\u9488\uff0c\u800c\u4e0d\u662f\u53cc\u5411\u6307\u9488\u3002\u8fd9\u5df2\u7ecf\u8db3\u591f\u4e86\uff0c\u56e0\u4e3a\u53ea\u4f1a\u5bf9\u5b83\u8fdb\u884c\u524d\u5411\u904d\u5386\u3002<\/p>\n

1.1 \u67e5\u8be2\u793a\u4f8b<\/h2>\n

\u4e0b\u9762\u7684\u67e5\u8be2\u5982\u4f55\u6267\u884c\u5462\uff1f<\/p>\n

explain(costs off)\nselect doc from ts where doc_tsv @@ to_tsquery('many & slitter');\n<\/code><\/pre>\n
                             QUERY PLAN                              \n---------------------------------------------------------------------\n Bitmap Heap Scan on ts\n   Recheck Cond: (doc_tsv @@ to_tsquery('many & slitter'::text))\n   ->  Bitmap Index Scan on ts_doc_tsv_idx\n         Index Cond: (doc_tsv @@ to_tsquery('many & slitter'::text))\n(4 rows)\n<\/code><\/pre>\n

\u9996\u5148\u4ece\u67e5\u8be2\u4e2d\u62bd\u53d6\u51fa\u6bcf\u4e2a\u8bcd\u7d20\uff08\u67e5\u8be2key\uff09\uff1amani\u548cslitter\u3002\u8fd9\u7531\u7279\u6b8a\u7684API\u5b8c\u6210\uff0c\u5b83\u4f1a\u8003\u8651\u6570\u636e\u7c7b\u578b\u548c\u64cd\u4f5c\u7b26\u7684\u7b56\u7565\uff1a<\/p>\n

select amop.amopopr::regoperator, amop.amopstrategy\nfrom pg_opclass opc, pg_opfamily opf, pg_am am, pg_amop amop\nwhere opc.opcname = 'tsvector_ops'\nand opf.oid = opc.opcfamily\nand am.oid = opf.opfmethod\nand amop.amopfamily = opc.opcfamily\nand am.amname = 'gin'\nand amop.amoplefttype = opc.opcintype;\n<\/code><\/pre>\n
        amopopr        | amopstrategy \n-----------------------+--------------\n @@(tsvector,tsquery)  |            1  matching search query\n @@@(tsvector,tsquery) |            2  synonym for @@ (for backward compatibility)\n(2 rows)\n<\/code><\/pre>\n

\u5728\u5305\u542b\u8bcd\u7d20\u7684B-tree\u4e2d\uff0c\u627e\u51fa\u4e24\u4e2akey\u5bf9\u5e94\u7684TID\u5217\u8868\uff1a<\/p>\n

mani\uff1a(0,2)
\nslitter\uff1a(0,1), (0,2), (1,2), (1,3), (2,2)<\/p>\n

\"\"<\/a><\/p>\n

\u6700\u540e\uff0c\u5bf9\u6bcf\u4e2a\u627e\u51fa\u7684TID\uff0c\u4f9d\u6b21\u8c03\u7528\u4e00\u81f4\u6027\u51fd\u6570\u3002\u8fd9\u4e2a\u4e00\u81f4\u6027\u51fd\u6570\u53ef\u4ee5\u786e\u5b9aTID\u6307\u5411\u7684\u884c\u662f\u5426\u6ee1\u8db3\u67e5\u8be2\u6761\u4ef6\u3002\u56e0\u4e3a\u67e5\u8be2\u4e2d\u7684\u8bcd\u7d20\u7528and\u8fde\u63a5\uff0c\u56e0\u6b64\u8fd4\u56de\u7684\u884c\u4ec5\u4ec5\u662f(0,2)\u3002<\/p>\n

       |      |         |  consistency\n       |      |         |    function\n  TID  | mani | slitter | slit & slitter\n-------+------+---------+----------------\n (0,1) |    f |       T |              f \n (0,2) |    T |       T |              T\n (1,2) |    f |       T |              f\n (1,3) |    f |       T |              f\n (2,2) |    f |       T |              f\n<\/code><\/pre>\n

\u7ed3\u679c\u662f\uff1a<\/p>\n

select doc from ts where doc_tsv @@ to_tsquery('many & slitter');\n<\/code><\/pre>\n
                     doc                     \n---------------------------------------------\n How many sheets could a sheet slitter slit?\n(1 row)\n<\/code><\/pre>\n

\u5982\u679c\u628aGIN\u548cGiST\u8fdb\u884c\u5bf9\u6bd4\uff0cGIN\u5728\u5168\u6587\u68c0\u7d22\u65b9\u9762\u7684\u4f18\u52bf\u663e\u800c\u6613\u89c1\u3002\u4f46\u662f\uff0cGIN\u7d22\u5f15\u4e5f\u6709\u7f3a\u9677\u3002<\/p>\n

1.2 \u66f4\u65b0\u901f\u5ea6\u6162\u7684\u95ee\u9898<\/h2>\n

GIN\u7d22\u5f15\u7684\u5176\u4e2d\u4e00\u4e2a\u95ee\u9898\u662f\uff1a\u63d2\u5165\u548c\u66f4\u65b0\u5f88\u6162\u3002\u4e00\u7bc7\u6587\u7ae0\u901a\u5e38\u5305\u542b\u5f88\u591a\u9700\u8981\u88ab\u7d22\u5f15\u7684\u8bcd\u7d20\u3002\u56e0\u6b64\uff0c\u5373\u4f7f\u4ec5\u4ec5\u6dfb\u52a0\u6216\u66f4\u65b0\u4e00\u7bc7\u6587\u7ae0\uff0c\u4e5f\u9700\u8981\u5bf9\u7d22\u5f15\u8fdb\u884c\u5927\u91cf\u66f4\u65b0\u3002<\/p>\n

\u5728\u53e6\u4e00\u65b9\u9762\uff0c\u5982\u679c\u591a\u4e2a\u6587\u6863\u88ab\u540c\u65f6\u66f4\u65b0\uff0c\u5b83\u4eec\u7684\u67d0\u4e9b\u8bcd\u7d20\u53ef\u80fd\u76f8\u540c\uff0c\u5bf9\u5b83\u4eec\u8fdb\u884c\u6279\u91cf\u66f4\u65b0\u7684\u603b\u5de5\u4f5c\u91cf\u4f1a\u6bd4\u4f9d\u6b21\u66f4\u65b0\u6bcf\u4e2a\u6587\u6863\u5de5\u4f5c\u91cf\u7684\u548c\u8981\u5c0f\u5f88\u591a\u3002<\/p>\n

GIN\u7d22\u5f15\u6709\u4e00\u4e2a\u53c2\u6570 \u2014\u2014 fastupdate\uff0c\u53ef\u4ee5\u5728\u521b\u5efa\u7d22\u5f15\u65f6\u6307\u5b9a\uff1a<\/p>\n

create index on ts using gin(doc_tsv) with (fastupdate = true);\n<\/code><\/pre>\n

\u6253\u5f00\u8fd9\u4e2a\u53c2\u6570\u65f6\uff0c\u66f4\u65b0\u4f1a\u88ab\u5ef6\u8fdf\uff0c\u8fd9\u4e9b\u66f4\u65b0\u4f1a\u5148\u88ab\u5b58\u50a8\u5728\u4e00\u4e2a\u5355\u72ec\u7684\u65e0\u5e8f\u5217\u8868\u4e2d\uff08\u5728\u5355\u72ec\u7684\u8fde\u7eed\u9875\u9762\u4e2d\uff09\u3002\u5f53\u8fd9\u4e2a\u5217\u8868\u8db3\u591f\u5927\u65f6\uff0c\u6216\u8005\u505avacuum\u65f6\uff0c\u8fd9\u4e9b\u66f4\u65b0\u4f1a\u88ab\u540c\u65f6\u66f4\u65b0\u5230\u7d22\u5f15\u4e2d\u3002\u591a\u5927\u662f\u8db3\u591f\u5927\u5462\uff1f\u8fd9\u7531gin_pending_list_limit\u53c2\u6570\u63a7\u5236\uff0c\u8fd9\u4e2a\u53c2\u6570\u4e5f\u53ef\u4ee5\u5728\u521b\u5efa\u7d22\u5f15\u65f6\u6307\u5b9a\u3002<\/p>\n

\u4f46\u662f\u8fd9\u4e2a\u65b9\u6cd5\u4e5f\u6709\u7f3a\u70b9\uff1a\u9996\u5148\uff0c\u67e5\u8be2\u4f1a\u53d8\u6162\uff08\u9664\u4e86\u67e5\u627etree\uff0c\u8fd8\u8981\u67e5\u627e\u8fd9\u4e2a\u65e0\u5e8f\u5217\u8868\uff09\uff1b\u5176\u6b21\uff0c\u4e0b\u4e00\u6b21\u66f4\u65b0\u53ef\u80fd\u4f1a\u8017\u8d39\u5927\u91cf\u7684\u65f6\u95f4\uff08\u89e6\u53d1\u4e86\u6279\u91cf\u66f4\u65b0\uff09\u3002<\/p>\n

1.3 \u90e8\u5206\u5339\u914d<\/h2>\n

\u6211\u4eec\u53ef\u4ee5\u5728\u5168\u6587\u68c0\u7d22\u4e2d\u4f7f\u7528\u90e8\u5206\u5339\u914d\uff0c\u4f8b\u5982\uff1a<\/p>\n

select doc from ts where doc_tsv @@ to_tsquery('slit:*');\n<\/code><\/pre>\n
                          doc                           \n--------------------------------------------------------\n Can a sheet slitter slit sheets?\n How many sheets could a sheet slitter slit?\n I slit a sheet, a sheet I slit.\n Upon a slitted sheet I sit.\n Whoever slit the sheets is a good sheet slitter.\n I am a sheet slitter.\n I slit sheets.\n I am the sleekest sheet slitter that ever slit sheets.\n She slits the sheet she sits on.\n(9 rows)\n<\/code><\/pre>\n

\u8fd9\u4e2a\u67e5\u8be2\u5c06\u627e\u51fa\u6240\u6709\u4ee5slit\u5f00\u5934\u7684\u8bcd\u7d20\uff0c\u5728\u8fd9\u4e2a\u4f8b\u5b50\u4e2d\uff0c\u5c31\u662fslit\u548cslitter<\/p>\n

\u8fd9\u4e2a\u67e5\u8be2\uff0c\u5373\u4f7f\u6ca1\u6709\u7d22\u5f15\uff0c\u67e5\u8be2\u53ef\u4ee5\u6b63\u5e38\u5de5\u4f5c\uff0c\u4f46\u662fGIN\u7d22\u5f15\u53ef\u4ee5\u52a0\u901f\u67e5\u8be2\uff1a<\/p>\n

explain (costs off)\nselect doc from ts where doc_tsv @@ to_tsquery('slit:*');\n<\/code><\/pre>\n
                         QUERY PLAN                          \n-------------------------------------------------------------\n Bitmap Heap Scan on ts\n   Recheck Cond: (doc_tsv @@ to_tsquery('slit:*'::text))\n   ->  Bitmap Index Scan on ts_doc_tsv_idx\n         Index Cond: (doc_tsv @@ to_tsquery('slit:*'::text))\n(4 rows)\n<\/code><\/pre>\n

\u8fd9\u4e2a\u4f8b\u5b50\u4e2d\uff0c\u4f1a\u9996\u5148\u67e5\u627e\u6240\u6709\u4ee5slit\u4e3a\u524d\u7f00\u7684\u8bcd\u7d20\uff0c\u7136\u540e\u4f7f\u7528OR\u8fde\u63a5\u8d77\u6765\u3002<\/p>\n

1.4 \u8bcd\u7d20\u7684\u9891\u7e41\u7a0b\u5ea6<\/h2>\n

\u6211\u4eec\u7ee7\u7eed\u4f7f\u7528pgsql-hacker\u90ae\u4ef6\u5217\u8868\u7684\u771f\u5b9e\u6570\u636e\u96c6\u8fdb\u884c\u7814\u7a76\uff0c\u8fd9\u4e2a\u6570\u636e\u96c6\u5728GiST\u7d22\u5f15\u4e2d\u4ecb\u7ecd\u8fc7\u3002\u5b83\u5305\u542b356125\u6761\u6d88\u606f\uff0c\u6bcf\u6761\u6d88\u606f\u5305\u542b\u53d1\u9001\u65e5\u671f\u3001\u4e3b\u9898\u3001\u4f5c\u8005\u548c\u5168\u6587\u3002<\/p>\n

alter table mail_messages add column tsv tsvector;\nupdate mail_messages set tsv = to_tsvector(body_plain);\n<\/code><\/pre>\n
NOTICE:  word is too long to be indexed\nDETAIL:  Words longer than 2047 characters are ignored.\n...\nUPDATE 356125\n<\/code><\/pre>\n
create index on mail_messages using gin(tsv);\n<\/code><\/pre>\n

\u9996\u5148\uff0c\u627e\u51fa\u51fa\u73b0\u9891\u7387\u6bd4\u8f83\u9ad8\u7684\u8bcd\u7d20\u3002\u6b64\u65f6\u4e0d\u80fd\u4f7f\u7528unnest\uff0c\u56e0\u4e3a\u6570\u636e\u91cf\u592a\u5927\uff0c\u6211\u4eec\u9700\u8981\u4f7f\u7528ts_stat\u51fd\u6570\uff0c\u5b83\u63d0\u4f9b\u4e86\u5f88\u591a\u4fe1\u606f\uff0c\u5982\u5728\u591a\u5c11\u6587\u6863\u4e2d\u51fa\u73b0\u8fc7\u3001\u603b\u7684\u51fa\u73b0\u6b21\u6570\u7b49\u3002<\/p>\n

select word, ndoc\nfrom ts_stat('select tsv from mail_messages')\norder by ndoc desc limit 3;\n<\/code><\/pre>\n
 word  |  ndoc  \n-------+--------\n re    | 322141\n wrote | 231174\n use   | 176917\n(3 rows)\n<\/code><\/pre>\n

\u6211\u4eec\u4f7f\u7528wrote\u8fd9\u4e2a\u8bcd\u3002
\n\u7136\u540e\u518d\u9009\u4e00\u4e2a\u51fa\u73b0\u4e0d\u9891\u7e41\u7684\u8bcd\uff0ctattoo\uff1a<\/p>\n

select word, ndoc from ts_stat('select tsv from mail_messages') where word = 'tattoo';\n<\/code><\/pre>\n
  word  | ndoc \n--------+------\n tattoo |    2\n(1 row)\n<\/code><\/pre>\n

\u6709\u6587\u6863\u540c\u65f6\u5305\u542b\u8fd9\u4e24\u4e2a\u8bcd\u7d20\u5417\uff1f\u770b\u8d77\u6765\uff0c\u597d\u50cf\u6709\uff1a<\/p>\n

select count(*) from mail_messages where tsv @@ to_tsquery('wrote & tattoo');\n<\/code><\/pre>\n
 count \n-------\n     1\n(1 row)\n<\/code><\/pre>\n

\u8fd9\u4e2a\u67e5\u8be2\u662f\u5982\u4f55\u6267\u884c\u7684\u5462\uff1f\u5982\u679c\u6211\u4eec\u9996\u5148\u62ff\u5230\u8fd9\u4e24\u4e2a\u8bcd\u7d20\u5bf9\u5e94\u7684TID\u5217\u8868\uff0c\u8fd9\u4f1a\u975e\u5e38\u4f4e\u6548\uff0c\u56e0\u4e3a\u9700\u8981\u53bb\u5904\u740620\u4e07\u4e2aTID\uff0c\u6700\u7ec8\u53ea\u6709\u4e00\u4e2a\u7b26\u5408\u6761\u4ef6\u3002\u5e78\u8fd0\u7684\u662f\uff0c\u4f18\u5316\u5668\u501f\u52a9\u7edf\u8ba1\u4fe1\u606f\uff0c\u53ef\u4ee5\u77e5\u9053wrote\u51fa\u7684\u9891\u7387\u5f88\u9ad8\uff0ctatoo\u51fa\u73b0\u9891\u7387\u8f83\u4f4e\u3002\u56e0\u6b64\uff0cQuery\u9996\u5148\u67e5\u627e\u4e0d\u9891\u7e41\u7684\u8bcd\u7d20\uff0c\u62ff\u5230\u4e24\u4e2a\u6587\u6863\uff0c\u7136\u540e\u518d\u68c0\u67e5\u6587\u6863\u4e2d\u662f\u5426\u6709wrote\u3002\u8fd9\u79cd\u65b9\u6cd5\u4f1a\u9ad8\u6548\u5f88\u591a\uff1a<\/p>\n

\\timing on\nselect count(*) from mail_messages where tsv @@ to_tsquery('wrote & tattoo');\n<\/code><\/pre>\n
 count \n-------\n     1\n(1 row)\nTime: 0,959 ms\n<\/code><\/pre>\n

\u67e5\u627ewrote\u672c\u8eab\u8017\u65f6\u5c31\u4f1a\u5f88\u957f\uff1a<\/p>\n

select count(*) from mail_messages where tsv @@ to_tsquery('wrote');\n<\/code><\/pre>\n
 count  \n--------\n 231174\n(1 row)\nTime: 2875,543 ms (00:02,876)\n<\/code><\/pre>\n

\u4f18\u5316\u5668\u5f53\u7136\u4e0d\u53ea\u80fd\u5904\u7406\u4e24\u4e2a\u8bcd\u7d20\uff0c\u8fd8\u80fd\u5904\u7406\u66f4\u590d\u6742\u7684\u573a\u666f\u3002<\/p>\n

1.5 Limit\u5b50\u53e5<\/h2>\n

GIN\u7d22\u5f15\u7684\u4e00\u4e2a\u7279\u70b9\u662f\uff1a\u5b83\u53ea\u80fd\u8fd4\u56debitmap\uff0c\u4e0d\u80fd\u4f9d\u6b21\u8fd4\u56de\u6bcf\u4e2aTID\u3002\u6240\u4ee5\uff0c\u672c\u6587\u4e2d\u6240\u6709\u7684\u8ba1\u5212\u90fd\u4f7f\u7528\u4e86bitmap scan\u3002<\/p>\n

\u56e0\u6b64\uff0c\u4f7f\u7528LIMIT\u5b50\u53e5\u65f6\u6548\u7387\u4e0d\u9ad8\u3002\u6ce8\u610f\u8c13\u8bcd\u64cd\u4f5c\u7684\u4ee3\u4ef7\uff08LIMIT\u7b97\u5b50\u7684cost\u5b57\u6bb5\uff09\uff1a<\/p>\n

explain (costs off)\nselect * from mail_messages where tsv @@ to_tsquery('wrote') limit 1;\n<\/code><\/pre>\n
                                       QUERY PLAN\n-----------------------------------------------------------------------------------------\n Limit  (cost=1283.61..1285.13 rows=1)\n   ->  Bitmap Heap Scan on mail_messages  (cost=1283.61..209975.49 rows=137207)\n         Recheck Cond: (tsv @@ to_tsquery('wrote'::text))\n         ->  Bitmap Index Scan on mail_messages_tsv_idx  (cost=0.00..1249.30 rows=137207)\n               Index Cond: (tsv @@ to_tsquery('wrote'::text))\n(5 rows)\n<\/code><\/pre>\n

\u8fd9\u4e2a\u67e5\u8be2\u7684\u603b\u4ee3\u4ef7\u4e3a1285.13\uff0c\u53ea\u6bd4\u5efa\u7acb\u4f4d\u56fe\u7684\u4ee3\u4ef7\uff081249.30\uff09\u7a0d\u5fae\u5927\u4e00\u70b9\u3002<\/p>\n

\u56e0\u6b64\uff0cGIN\u7d22\u5f15\u6709\u4e00\u4e2a\u7279\u6b8a\u7684\u80fd\u529b\u6765\u9650\u5236\u67e5\u8be2\u7ed3\u679c\u7684\u6570\u91cf\u3002\u8fd9\u4e2a\u9608\u503c\u4f7f\u7528gin_fuzzy_search_limit\u53c2\u6570\u63a7\u5236\uff0c\u5b83\u7684\u9ed8\u8ba4\u503c\u4e3a0\uff08\u65e0\u9650\u5236\uff09\u3002\u6211\u4eec\u53ef\u4ee5\u624b\u5de5\u8bbe\u7f6e\u8fd9\u4e2a\u503c\uff1a<\/p>\n

set gin_fuzzy_search_limit = 1000;\nselect count(*) from mail_messages where tsv @@ to_tsquery('wrote');\n<\/code><\/pre>\n
 count \n-------\n  5746\n(1 row)\n<\/code><\/pre>\n
set gin_fuzzy_search_limit = 10000;\nselect count(*) from mail_messages where tsv @@ to_tsquery('wrote');\n<\/code><\/pre>\n
 count \n-------\n 14726\n(1 row)\n<\/code><\/pre>\n

\u53ef\u89c1\uff0c\u53c2\u6570\u4e0d\u540c\uff0c\u4f7f\u7528GIN\u65f6\u8fd4\u56de\u7684\u7ed3\u679c\u6570\u91cf\u4e0d\u540c\u3002\u8fd9\u4e2a\u9650\u5236\u4e0d\u5341\u5206\u7cbe\u786e\uff1a\u8fd4\u56de\u7684\u7ed3\u679c\u884c\u6570\u7a0d\u591a\u4e00\u4e9b\u3002\u8fd9\u5c31\u662ffuzzy\u7684\u610f\u601d\u3002<\/p>\n

1.6 \u538b\u7f29\u65b9\u5f0f<\/h2>\n

GIN\u7684\u5176\u4e2d\u4e00\u4e2a\u4f18\u52bf\u662f\uff0c\u5b83\u5177\u6709\u538b\u7f29\u7684\u7279\u6027\u3002\u9996\u5148\uff0c\u5982\u679c\u4e00\u4e2a\u8bcd\u7d20\u5728\u591a\u4e2a\u6587\u6863\u4e2d\u51fa\u73b0\uff08\u901a\u5e38\u662f\u8fd9\u6837\u7684\uff09\uff0c\u5b83\u53ea\u4f1a\u88ab\u5b58\u50a8\u4e00\u6b21\u3002\u5176\u6b21\uff0cTID\u5728\u7d22\u5f15\u4e2d\u6309\u987a\u5e8f\u5b58\u50a8\uff0c\u8fd9\u53ef\u4ee5\u4f7f\u5f97\u6211\u4eec\u4f7f\u7528\u4e00\u4e2a\u7b80\u5355\u7684\u538b\u7f29\u7b97\u6cd5\uff1a\u53ea\u5b58\u50a8\u6bcf\u4e2aTID\u4e0e\u524d\u4e00\u4e2aTID\u7684\u5dee\u5f02\uff1b\u8fd9\u901a\u5e38\u6765\u8bf4\u662f\u4e00\u4e2a\u6bd4\u8f83\u5c0f\u7684\u6570\uff0c\u5360\u7528\u7684\u7a7a\u95f4\u6570\u5c11\u4e8eTID\u76846\u5b57\u8282\u3002<\/p>\n

\u4e3a\u4e86\u76f4\u89c2\u611f\u53d7\u4e00\u4e0b\u4f53\u79ef\uff0c\u6211\u4eec\u5bf9\u4e0a\u9762\u7684\u90ae\u4ef6\u5217\u8868\u5efa\u7acbB-tree\u7d22\u5f15\u3002\u5f53\u7136\uff0c\u7edd\u5bf9\u7684\u516c\u5e73\u662f\u4e0d\u53ef\u80fd\u7684\uff1a
\nGIN\u628a\u7d22\u5f15\u5efa\u7acb\u5728\u5176\u5b83\u6570\u636e\u7c7b\u578b\u4e0a\uff08tsvector\uff0c\u800c\u4e0d\u662ftext\uff09\uff0c\u4f53\u79ef\u4f1a\u5c0f\u4e00\u4e9b\uff1b\u53e6\u4e00\u65b9\u9762\uff0cB-tree\u4f1a\u628a\u6d88\u606f\u622a\u65ad\u52302K\u4ee5\u5185\u3002<\/p>\n

create index mail_messages_btree on mail_messages(substring(body_plain for 2048));\ncreate index mail_messages_gist on mail_messages using gist(tsv);\nselect pg_size_pretty(pg_relation_size('mail_messages_tsv_idx')) as gin,\n             pg_size_pretty(pg_relation_size('mail_messages_gist')) as gist,\n             pg_size_pretty(pg_relation_size('mail_messages_btree')) as btree;\n<\/code><\/pre>\n
  gin   |  gist  | btree  \n--------+--------+--------\n 179 MB | 125 MB | 546 MB\n(1 row)\n<\/code><\/pre>\n

\u56e0\u4e3aGIN\u7d22\u5f15\u7684\u538b\u7f29\u7279\u6027\uff0c\u5f53\u8fc1\u79fbOracle\u7684\u4f4d\u56fe\u7d22\u5f15\u65f6\uff0c\u53ef\u4ee5\u4f7f\u7528GIN\u7d22\u5f15\uff08\u6b64\u5904\u4e0d\u8be6\u7ec6\u5c55\u5f00\uff0c\u53ef\u4ee5\u53c2\u89c1Lewis\u7684\u535a\u5ba2<\/a>\uff09\u3002Oracle\u7684\u4f4d\u56fe\u7528\u5728\u552f\u4e00\u503c\u8f83\u5c11\u7684\u5217\u4e0a\uff0c\u8fd9\u4e5f\u5f88\u9002\u5408GIN\u3002PG\u53ef\u4ee5\u4e3a\u4efb\u4f55\u7d22\u5f15\u751f\u6210\u4f4d\u56fe\uff0c\u5305\u62ecGIN\u3002<\/p>\n

1.7 \u4f7f\u7528GiST\u8fd8\u662fGIN\u5462\uff1f<\/h2>\n

\u5bf9\u5f88\u591a\u6570\u636e\u7c7b\u578b\u6765\u8bf4\uff0c\u64cd\u4f5c\u7b26\u7c7b\u53ef\u4ee5\u540c\u65f6\u9002\u7528\u4e8eGiST\u548cGIN\uff0c\u6211\u4eec\u5e94\u8be5\u5982\u4f55\u9009\u62e9\u5462\uff1f<\/p>\n

\u4f5c\u4e3a\u4e00\u4e2a\u539f\u5219\uff0cGIN\u7684\u7cbe\u5ea6\u548c\u901f\u5ea6\u66f4\u9ad8\u3002\u5982\u679c\u66f4\u65b0\u4e0d\u9891\u7e41\uff0c\u53c8\u5bf9\u67e5\u627e\u901f\u5ea6\u6709\u8981\u6c42\uff0cGIN\u662f\u5176\u4e2d\u4e00\u4e2a\u9009\u62e9\u3002<\/p>\n

\u53e6\u4e00\u65b9\u9762\uff0c\u5982\u679c\u6570\u636e\u66f4\u65b0\u9891\u7e41\uff0c\u66f4\u65b0GIN\u7d22\u5f15\u7684\u4ee3\u4ef7\u5c31\u4f1a\u5f88\u5927\uff0c\u8fd9\u79cd\u573a\u666f\u4e0b\uff0c\u6211\u4eec\u9700\u8981\u6bd4\u8f83\u4e8c\u8005\uff0c\u9009\u62e9\u66f4\u9002\u5408\u7684\u5e94\u7528\u573a\u666f\u7684\u90a3\u4e2a\u3002<\/p>\n

2. \u6570\u7ec4\u7c7b\u578b<\/h1>\n

\u53e6\u4e00\u4e2a\u4f7f\u7528GIN\u7d22\u5f15\u7684\u4f8b\u5b50\u662f\u6570\u7ec4\u3002\u8fd9\u79cd\u573a\u666f\u4e0b\uff0c\u5bf9\u6570\u7ec4\u5143\u7d20\u5efa\u7acb\u7d22\u5f15\uff0c\u53ef\u4ee5\u52a0\u901f\u6570\u7ec4\u4e0a\u7684\u4e00\u4e9b\u64cd\u4f5c\uff1a<\/p>\n

select amop.amopopr::regoperator, amop.amopstrategy\nfrom pg_opclass opc, pg_opfamily opf, pg_am am, pg_amop amop\nwhere opc.opcname = 'array_ops'\nand opf.oid = opc.opcfamily\nand am.oid = opf.opfmethod\nand amop.amopfamily = opc.opcfamily\nand am.amname = 'gin'\nand amop.amoplefttype = opc.opcintype;\n<\/code><\/pre>\n
        amopopr        | amopstrategy \n-----------------------+--------------\n &&(anyarray,anyarray) |            1  intersection\n @>(anyarray,anyarray) |            2  contains array\n <@(anyarray,anyarray) |            3  contained in array\n =(anyarray,anyarray)  |            4  equality\n(4 rows)\n<\/code><\/pre>\n

\u6211\u4eec\u7684demo\u6570\u636e\u5e93\u4e2d\u6709\u4e00\u4e2a\u5305\u542b\u822a\u73ed\u4fe1\u606f\u7684routes\u89c6\u56fe\u3002\u8fd9\u4e2a\u89c6\u56fe\u5305\u542b\u4e86\u4e00\u4e2adays_of_week\u5217 \u2014\u2014 \u4e00\u4e2a\u6570\u7ec4\uff0c\u63cf\u8ff0\u4e86\u54ea\u4e2a\u5de5\u4f5c\u65e5\u6709\u822a\u73ed\u3002\u4f8b\u5982\uff0c\u4eceVnukovo\u5230Gelendzhik\u6bcf\u5468\u4e8c\u3001\u5468\u56db\u3001\u5468\u65e5\u6709\u4e00\u4e2a\u822a\u73ed\u3002<\/p>\n

select departure_airport_name, arrival_airport_name, days_of_week\nfrom routes\nwhere flight_no = 'PG0049';\n<\/code><\/pre>\n
 departure_airport_name | arrival_airport_name | days_of_week \n------------------------+----------------------+--------------\n Vnukovo                | Gelendzhik            | {2,4,7}\n(1 row)\n<\/code><\/pre>\n

\u4e3a\u4e86\u5efa\u7acb\u7d22\u5f15\uff0c\u9996\u5148\u628a\u8fd9\u4e2a\u89c6\u56fe\u7269\u5316\u5230\u4e00\u5f20\u8868\u4e2d\u3002<\/p>\n

create table routes_t as select * from routes;\ncreate index on routes_t using gin(days_of_week);\n<\/code><\/pre>\n

\u73b0\u5728\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528\u7d22\u5f15\u67e5\u8be2\u5728\u5468\u4e8c\u3001\u5468\u56db\u3001\u5468\u65e5\u8d77\u98de\u7684\u822a\u73ed\uff1a<\/p>\n

explain (costs off) select * from routes_t where days_of_week = ARRAY[2,4,7];\n<\/code><\/pre>\n
                        QUERY PLAN                         \n-----------------------------------------------------------\n Bitmap Heap Scan on routes_t\n   Recheck Cond: (days_of_week = '{2,4,7}'::integer[])\n   ->  Bitmap Index Scan on routes_t_days_of_week_idx\n         Index Cond: (days_of_week = '{2,4,7}'::integer[])\n(4 rows)\n<\/code><\/pre>\n

\u5171\u67096\u4e2a\u822a\u73ed\uff1a<\/p>\n

select flight_no, departure_airport_name, arrival_airport_name, days_of_week from routes_t where days_of_week = ARRAY[2,4,7];\n<\/code><\/pre>\n
 flight_no | departure_airport_name | arrival_airport_name | days_of_week \n-----------+------------------------+----------------------+--------------\n PG0005    | Domodedovo             | Pskov                | {2,4,7}\n PG0049    | Vnukovo                | Gelendzhik           | {2,4,7}\n PG0113    | Naryan-Mar             | Domodedovo           | {2,4,7}\n PG0249    | Domodedovo             | Gelendzhik           | {2,4,7}\n PG0449    | Stavropol             | Vnukovo              | {2,4,7}\n PG0540    | Barnaul                | Vnukovo              | {2,4,7}\n(6 rows)\n<\/code><\/pre>\n

\u8fd9\u4e2a\u67e5\u8be2\u662f\u5982\u4f55\u6267\u884c\u7684\u5462\uff1f\u548c\u4e0a\u9762\u7684\u63cf\u8ff0\u76f8\u540c\uff1a<\/p>\n

\u9996\u5148\u4ece\u6761\u4ef6\u4e2d\u7684\u6570\u7ec4\u4e2d\uff0c\u627e\u51fa\u5143\u7d20\uff0c\u5373\uff1a2\u30014\u30017\u3002<\/p>\n

\u5728\u5305\u542b\u5143\u7d20\u7684\u6811\u4e2d\uff0c\u627e\u51fa\u6bcf\u4e2akey\uff0c\u5bf9\u6bcf\u4e2akey\u627e\u51fa\u4ed6\u4eec\u7684TID\u3002\u5bf9\u6240\u6709\u627e\u5230\u7684TID\uff0c\u4f7f\u7528\u4e00\u81f4\u6027\u51fd\u6570\u9009\u51fa\u7b26\u5408\u6761\u4ef6\u7684\u5143\u7d20\u3002\u5bf9\u7b49\u53f7\u6765\u8bf4\uff0c\u53ea\u6709\u540c\u65f6\u5305\u542b\u8fd9\u4e09\u4e2akey\uff0c\u5e76\u4e14\u4e0d\u5305\u542b\u5176\u5b83key\u624d\u884c\u3002\u7d22\u5f15\u626b\u63cf\u65e0\u6cd5\u4f7f\u7528\u201c\u4e0d\u7b49\u201d\u6761\u4ef6\u8fdb\u884c\u626b\u63cf\uff0c\u56e0\u6b64GIN\u7d22\u5f15\u9700\u8981\u8981\u6c42\u901a\u7528\u7d22\u5f15\u5f15\u64ce\u91cd\u65b0\u68c0\u67e5\u5b83\u8fd4\u56de\u7684TID\u662f\u5426\u6ee1\u8db3\u6761\u4ef6\u3002<\/p>\n

\u6709\u610f\u601d\u7684\u662f\uff0c\u6709\u4e00\u4e9b\u7b56\u7565\uff08\u6bd4\u5982\uff0c\u88ab\u6570\u7ec4\u5305\u542b\uff09\u65e0\u6cd5\u68c0\u67e5\u4efb\u4f55\u4e1c\u897f\uff0c\u901a\u7528\u641c\u7d22\u5f15\u64ce\u9700\u8981\u628a\u5b83\u8fd4\u56de\u7684\u6240\u6709TID\u90fd\u8fdb\u884c\u91cd\u65b0\u68c0\u67e5\u3002<\/p>\n

\u5982\u679c\u6211\u4eec\u8981\u627e\u5468\u4e8c\u3001\u5468\u56db\u3001\u5468\u65e5\u4eceMoscow\u8d77\u98de\u7684\u822a\u73ed\u5462\uff1fGIN\u7d22\u5f15\u53ea\u80fd\u652f\u6301\u4e00\u4e2a\u6761\u4ef6\uff0c\u53e6\u5916\u4e00\u4e2a\u6761\u4ef6\uff0c\u5c31\u79f0\u4e3a\u8fc7\u6ee4\u6761\u4ef6\uff08Filter\u5b57\u6bb5\uff09\u3002<\/p>\n

explain (costs off)\nselect * from routes_t where days_of_week = ARRAY[2,4,7] and departure_city = 'Moscow';\n<\/code><\/pre>\n
                        QUERY PLAN                         \n-----------------------------------------------------------\n Bitmap Heap Scan on routes_t\n   Recheck Cond: (days_of_week = '{2,4,7}'::integer[])\n   Filter: (departure_city = 'Moscow'::text)\n   ->  Bitmap Index Scan on routes_t_days_of_week_idx\n         Index Cond: (days_of_week = '{2,4,7}'::integer[])\n(5 rows)\n<\/code><\/pre>\n

\u4e0a\u9762\u7684\u4f8b\u5b50\u4e2d\uff0c\u53ea\u67096\u884c\u7ed3\u679c\uff0c\u4f7f\u7528\u8fd9\u79cd\u65b9\u6cd5\u53ef\u884c\uff0c\u5982\u679c\u6761\u4ef6\u7684\u9009\u62e9\u7387\u5f88\u9ad8\uff0c\u8fd4\u56de\u5f88\u591a\u7ed3\u679c\u65f6\uff0c\u5c31\u9700\u8981\u628a\u8fc7\u6ee4\u6761\u4ef6\u4e0b\u63a8\u5230\u626b\u63cf\u626b\u63cf\u4e2d\uff0c\u6210\u4e3a\u7d22\u5f15\u626b\u63cf\u7684\u626b\u63cf\u6761\u4ef6\u3002\u4f46\u662f\uff0c\u6211\u4eec\u4e0d\u80fd\u521b\u5efa\u8fd9\u6837\u7684\u7d22\u5f15\uff1a<\/p>\n

create index on routes_t using gin(days_of_week,departure_city);\n<\/code><\/pre>\n
ERROR:  data type text has no default operator class for access method \"gin\"\nHINT:  You must specify an operator class for the index or define a default operator class for the data type.\n<\/code><\/pre>\n

btree_gin\u53ef\u4ee5\u63d0\u4f9b\u8fd9\u79cd\u80fd\u529b\uff0c\u5b83\u589e\u52a0\u4e86\u4e00\u4e9b\u64cd\u4f5c\u7b26\u7c7b\u6a21\u62dfB-tree\u7684\u529f\u80fd\u3002<\/p>\n

create extension btree_gin;\ncreate index on routes_t using gin(days_of_week,departure_city);\nexplain (costs off)\nselect * from routes_t where days_of_week = ARRAY[2,4,7] and departure_city = 'Moscow';\n<\/code><\/pre>\n
                             QUERY PLAN\n---------------------------------------------------------------------\n Bitmap Heap Scan on routes_t\n   Recheck Cond: ((days_of_week = '{2,4,7}'::integer[]) AND\n                  (departure_city = 'Moscow'::text))\n   ->  Bitmap Index Scan on routes_t_days_of_week_departure_city_idx\n         Index Cond: ((days_of_week = '{2,4,7}'::integer[]) AND\n                      (departure_city = 'Moscow'::text))\n(4 rows)\n<\/code><\/pre>\n

3. JSONB\u7c7b\u578b<\/h1>\n

GIN\u672c\u8eab\u652f\u6301JSON\u7c7b\u578b\u3002\u4e3a\u4e86\u652f\u6301JSON\u503c\uff0c\u5b83\u5b9e\u73b0\u4e86\u4e00\u7cfb\u5217\u64cd\u4f5c\u7b26\u548c\u51fd\u6570\uff0c\u7528\u6765\u52a0\u901f\u7d22\u5f15\u7684\u67e5\u8be2\uff1a<\/p>\n

select opc.opcname, amop.amopopr::regoperator, amop.amopstrategy as str\nfrom pg_opclass opc, pg_opfamily opf, pg_am am, pg_amop amop\nwhere opc.opcname in ('jsonb_ops','jsonb_path_ops')\nand opf.oid = opc.opcfamily\nand am.oid = opf.opfmethod\nand amop.amopfamily = opc.opcfamily\nand am.amname = 'gin'\nand amop.amoplefttype = opc.opcintype;\n<\/code><\/pre>\n
    opcname     |     amopopr      | str\n----------------+------------------+-----\n jsonb_ops      | ?(jsonb,text)    |   9  top-level key exists\n jsonb_ops      | ?|(jsonb,text[]) |  10  some top-level key exists\n jsonb_ops      | ?&(jsonb,text[]) |  11  all top-level keys exist\n jsonb_ops      | @>(jsonb,jsonb)  |   7  JSON value is at top level\n jsonb_path_ops | @>(jsonb,jsonb)  |   7\n(5 rows)\n<\/code><\/pre>\n

\u53ef\u4ee5\u770b\u5230\uff0c\u4e24\u4e2a\u64cd\u4f5c\u7b26\u7c7b\u53ef\u4ee5\u4f7f\u7528: jsonb_ops\u548cjsonb_path_ops\u3002<\/p>\n

\u9ed8\u8ba4\u4f7f\u7528\u7b2c\u4e00\u4e2a\u64cd\u4f5c\u7b26\u7c7bjsonb_ops\u3002\u6240\u6709\u7684key\u3001\u503c\u3001\u6570\u7ec4\u5143\u7ec4\u90fd\u4f1a\u88ab\u5f53\u6210\u521d\u59cb\u6587\u6863\u7684\u5143\u7d20\u3002\u6bcf\u4e2a\u5143\u7d20\u4e0a\u589e\u52a0\u4e86\u4e00\u4e2a\u5c5e\u6027\uff0c\u8868\u793a\u8fd9\u4e2a\u5143\u7d20\u662f\u5426\u4e3a\u4e00\u4e2akey\uff08\u8fd9\u5728exist\u7b56\u7565\u4e2d\u6709\u7528\uff0c\u5b83\u533a\u5206key\u548c\u503c\uff09\u3002<\/p>\n

\u4f8b\u5982\uff0c\u6211\u4eec\u4f7f\u7528routes\u4e2d\u7684\u51e0\u884c\u8868\u793a\u6210JSON\u683c\u5f0f\uff1a<\/p>\n

create table routes_jsonb as\n  select to_jsonb(t) route \n  from (\n      select departure_airport_name, arrival_airport_name, days_of_week\n      from routes \n      order by flight_no limit 4\n  ) t;\nselect ctid, jsonb_pretty(route) from routes_jsonb;\n<\/code><\/pre>\n
 ctid  |                 jsonb_pretty                  \n-------+-------------------------------------------------\n (0,1) | {                                              +\n       |     \"days_of_week\": [                          +\n       |         1                                      +\n       |     ],                                         +\n       |     \"arrival_airport_name\": \"Surgut\",          +\n       |     \"departure_airport_name\": \"Ust-Ilimsk\"     +\n       | }\n (0,2) | {                                              +\n       |     \"days_of_week\": [                          +\n       |         2                                      +\n       |     ],                                         +\n       |     \"arrival_airport_name\": \"Ust-Ilimsk\",      +\n       |     \"departure_airport_name\": \"Surgut\"         +\n       | }\n (0,3) | {                                              +\n       |     \"days_of_week\": [                          +\n       |         1,                                     +\n       |         4                                      +\n       |     ],                                         +\n       |     \"arrival_airport_name\": \"Sochi\",           +\n       |     \"departure_airport_name\": \"Ivanovo-Yuzhnyi\"+\n       | }\n (0,4) | {                                              +\n       |     \"days_of_week\": [                          +\n       |         2,                                     +\n       |         5                                      +\n       |     ],                                         +\n       |     \"arrival_airport_name\": \"Ivanovo-Yuzhnyi\", +\n       |     \"departure_airport_name\": \"Sochi\"          +\n       | }\n(4 rows)\n<\/code><\/pre>\n
create index on routes_jsonb using gin(route);\n<\/code><\/pre>\n

\u7d22\u5f15\u53ef\u80fd\u50cf\u4e0b\u9762\u8fd9\u6837\uff1a<\/p>\n

\"\"<\/a><\/p>\n

\u73b0\u5728\uff0c\u4e0b\u9762\u8fd9\u6837\u7684\u67e5\u8be2\uff0c\u53ef\u4ee5\u4f7f\u7528\u7d22\u5f15\uff1a<\/p>\n

explain (costs off) \nselect jsonb_pretty(route) \nfrom routes_jsonb \nwhere route @> '{\"days_of_week\": [5]}';\n<\/code><\/pre>\n
                          QUERY PLAN                           \n---------------------------------------------------------------\n Bitmap Heap Scan on routes_jsonb\n   Recheck Cond: (route @> '{\"days_of_week\": [5]}'::jsonb)\n   ->  Bitmap Index Scan on routes_jsonb_route_idx\n         Index Cond: (route @> '{\"days_of_week\": [5]}'::jsonb)\n(4 rows)\n<\/code><\/pre>\n

\u4eceJSON\u6587\u6863\u7684\u6839\u8282\u70b9\u5f00\u59cb\uff0c@>\u64cd\u4f5c\u7b26\u68c0\u67e5\u7279\u5b9a\u7684route(“days_of_week”: [5])\u662f\u5426\u5b58\u5728\u3002\u67e5\u8be2\u4f1a\u8fd4\u56de\u4e00\u884c\uff1a<\/p>\n

select jsonb_pretty(route) from routes_jsonb where route @> '{\"days_of_week\": [5]}';\n<\/code><\/pre>\n
                 jsonb_pretty                 \n------------------------------------------------\n {                                             +\n     \"days_of_week\": [                         +\n         2,                                    +\n         5                                     +\n     ],                                        +\n     \"arrival_airport_name\": \"Ivanovo-Yuzhnyi\",+\n     \"departure_airport_name\": \"Sochi\"         +\n }\n(1 row)\n<\/code><\/pre>\n

\u67e5\u8be2\u6309\u4e0b\u9762\u7684\u65b9\u5f0f\u6267\u884c\uff1a<\/p>\n

\u67e5\u8be2\u4e2d\u7684\u5143\u7d20\u88ab\u63d0\u53d6\u51fa\u6765\uff1adays_of_week\u548c5\u3002\u4ece\u5305\u542b\u5143\u7d20\u7684\u6811\u4e2d\u627e\u51fa\u88ab\u62bd\u53d6\u7684key\uff0c\u627e\u51fa\u5b83\u4eec\u6240\u5bf9\u5e94\u7684TID\u5217\u8868\u3002\u5bf95\u6765\u8bf4\u662f\uff080\uff0c4\uff09\uff0c\u5bf9days_of_week\u6765\u8bf4\u662f(0,1), (0,2), (0,3), (0,4)\u3002<\/p>\n

\u5bf9\u6240\u6709\u627e\u5230\u7684TID\uff0c\u4f7f\u7528\u4e00\u81f4\u6027\u51fd\u6570\u627e\u51fa\u6240\u6709\u6ee1\u8db3\u67e5\u8be2\u64cd\u4f5c\u7b26\u7684\u884c\u3002\u5bf9@>\u64cd\u4f5c\u7b26\u6765\u8bf4\uff0c\u4e0d\u5305\u542b\u6240\u6709\u5143\u7d20\u7684\u6587\u6863\u80af\u5b9a\u4e0d\u7b26\u5408\u6761\u4ef6\uff0c\u56e0\u6b64\u503c\u7559\u4e0b\u4e86\uff080\uff0c4\uff09\u3002\u4f46\u662f\u6211\u4eec\u4ecd\u7136\u9700\u8981\u91cd\u65b0\u68c0\u67e5\u5269\u4e0b\u7684TID\uff0c\u56e0\u4e3a\u6211\u4eec\u8fd8\u4e0d\u77e5\u9053\u5b83\u4eec\u5728\u6587\u6863\u4e2d\u662f\u5426\u6309\u671f\u671b\u7684\u987a\u5e8f\u51fa\u73b0\u3002<\/p>\n

\u8bfb\u8005\u53ef\u4ee5\u9605\u8bfb\u6587\u6863\u4e86\u89e3\u5176\u5b83\u64cd\u4f5c\u7b26\u3002<\/p>\n

\u9664\u4e86\u5e38\u7528\u7684\u652f\u6301JSON\u7684\u64cd\u4f5c\u7b26\uff0cjsquery\u63d2\u4ef6\u4e5f\u5df2\u7ecf\u53ef\u7528\u5f88\u4e45\u4e86\uff0c\u5b83\u5b9a\u4e49\u4e86\u4e00\u79cd\u5177\u6709\u4e30\u5bcc\u80fd\u529b\u7684\u67e5\u8be2\u8bed\u8a00\uff08\u5f53\u7136\u5305\u542b\u5bf9GIN\u7d22\u5f15\u7684\u652f\u6301\uff09\u3002\u53e6\u5916\uff0c\u57282016\u5e74\uff0c\u65b0\u7684SQL\u6807\u51c6\u88ab\u53d1\u5e03\uff0c\u5b83\u5b9a\u4e49\u4e86\u81ea\u5df1\u7684\u64cd\u4f5c\u7b26\u548c\u67e5\u8be2\u8bed\u8a00SQL\/JSON path\u3002SQL\/JSON path\u5728PG12\u4e2d\u5df2\u7ecf\u652f\u6301\uff0c\u5176\u5b83\u90e8\u5206\u53ef\u80fd\u5728PG13\u79cd\u5b9e\u73b0\u3002<\/p>\n

4. \u5185\u90e8\u5b9e\u73b0<\/h1>\n

\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528pageinspect\u63d2\u4ef6\u67e5\u770bGIN\u7d22\u5f15\u7684\u5185\u90e8\u3002<\/p>\n

create extension pageinspect;\n<\/code><\/pre>\n

\u5143\u9875\u9762\u7684\u7edf\u8ba1\u4fe1\u606f\uff1a<\/p>\n

select * from gin_metapage_info(get_raw_page('mail_messages_tsv_idx',0));\n<\/code><\/pre>\n
-[ RECORD 1 ]----+-----------\npending_head     | 4294967295\npending_tail     | 4294967295\ntail_free_size   | 0\nn_pending_pages  | 0\nn_pending_tuples | 0\nn_total_pages    | 22968\nn_entry_pages    | 13751\nn_data_pages     | 9216\nn_entries        | 1423598\nversion          | 2\n<\/code><\/pre>\n

\u9875\u9762\u63d0\u4f9b\u4e86\u4e00\u4e2aspecial\u533a\u57df\uff0c\u5b58\u50a8\u4e86AM\u81ea\u5df1\u7684\u4fe1\u606f\u3002\u8fd9\u4e2a\u533a\u57df\u5bf9\u666e\u901a\u7a0b\u5e8f\uff08\u6bd4\u5982vacuum\uff09\u6765\u8bf4\u662fopaque\u3002gin_page_opaque_info\u5c55\u793a\u4e86GIN\u7d22\u5f15\u7684\u8fd9\u4e9b\u6570\u636e\u3002\u4f8b\u5982\uff0c\u6211\u4eec\u53ef\u4ee5\u77e5\u9053\u7d22\u5f15\u7684\u9875\u9762\uff1a<\/p>\n

select flags, count(*)\nfrom generate_series(1,22967) as g(id), -- n_total_pages\n     gin_page_opaque_info(get_raw_page('mail_messages_tsv_idx',g.id))\ngroup by flags;\n<\/code><\/pre>\n
         flags          | count \n------------------------+-------\n {meta}                 |     1  meta page\n {}                     |   133  internal page of element B-tree\n {leaf}                 | 13618  leaf page of element B-tree\n {data}                 |  1497  internal page of TID B-tree\n {data,leaf,compressed} |  7719  leaf page of TID B-tree\n(5 rows)\n<\/code><\/pre>\n

gin_leafpage_items\u51fd\u6570\u63d0\u4f9b\u4e86TID\u4e2d\u7684\u4fe1\u606f{data,leaf,compressed}:<\/p>\n

select * from gin_leafpage_items(get_raw_page('mail_messages_tsv_idx',2672));\n<\/code><\/pre>\n
-[ RECORD 1 ]---------------------------------------------------------------------\nfirst_tid | (239,44)\nnbytes    | 248\ntids      | {\"(239,44)\",\"(239,47)\",\"(239,48)\",\"(239,50)\",\"(239,52)\",\"(240,3)\",...\n-[ RECORD 2 ]---------------------------------------------------------------------\nfirst_tid | (247,40)\nnbytes    | 248\ntids      | {\"(247,40)\",\"(247,41)\",\"(247,44)\",\"(247,45)\",\"(247,46)\",\"(248,2)\",...\n...\n<\/code><\/pre>\n

\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0cTID\u6811\u7684\u53f6\u5b50\u9875\u9762\u4e2d\u5b9e\u9645\u5b58\u653e\u4e86\u5f88\u5c0f\u7684\u538b\u7f29\u5217\u8868\uff0c\u800c\u4e0d\u662f\u5355\u72ec\u7684\u6307\u9488\u3002<\/p>\n

5. \u5c5e\u6027<\/h1>\n

\u6211\u4eec\u770b\u4e00\u4e0bGIN AM\u7684\u5c5e\u6027\uff08\u67e5\u8be2\u89c1\u672c\u7cfb\u5217\u7b2c\u4e8c\u7bc7\u6587\u7ae0\uff09\uff1a<\/p>\n

 amname |     name      | pg_indexam_has_property \n--------+---------------+-------------------------\n gin    | can_order     | f\n gin    | can_unique    | f\n gin    | can_multi_col | t\n gin    | can_exclude   | f\n<\/code><\/pre>\n

\u6709\u8da3\u7684\u662f\uff0cGIN\u652f\u6301\u521b\u5efa\u591a\u5217\u7d22\u5f15\u3002\u7136\u800c\uff0c\u548c\u5e38\u89c4\u7684B-tree\u4e0d\u540c\uff0c\u591a\u5217\u7d22\u5f15\u4ecd\u7136\u5b58\u50a8\u5355\u4e2a\u7684\u5143\u7d20\uff0c\u800c\u4e0d\u662f\u5b58\u50a8\u590d\u5408\u7684key\uff0c\u6bcf\u4e2a\u5143\u7d20\u90fd\u6709\u4e00\u4e2a\u5217\u7f16\u53f7\u8868\u793a\u3002<\/p>\n

\u7d22\u5f15\u5c42\u9762\u7684\u5c5e\u6027\uff1a<\/p>\n

     name      | pg_index_has_property \n---------------+-----------------------\n clusterable   | f\n index_scan    | f\n bitmap_scan   | t\n backward_scan | f\n<\/code><\/pre>\n

\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u4e0d\u652f\u6301\u4f9d\u6b21\u8fd4\u56de\u6bcf\u4e2aTID\uff0c\u53ea\u652f\u6301\u4f4d\u56fe\u626b\u63cf\u3002\u4e0d\u652f\u6301\u53cd\u5411\u626b\u63cf\uff1a\u8fd9\u5bf9\u7d22\u5f15\u626b\u63cf\u5f88\u6709\u7528\uff0c\u4f46\u662f\u5bf9\u4f4d\u56fe\u626b\u63cf\u6ca1\u7528\u3002<\/p>\n

\u4e0b\u9762\u662f\u5217\u5c42\u9762\u7684\u5c5e\u6027\uff1a<\/p>\n

        name        | pg_index_column_has_property \n--------------------+------------------------------\n asc                | f\n desc               | f\n nulls_first        | f\n nulls_last         | f\n orderable          | f\n distance_orderable | f\n returnable         | f\n search_array       | f\n search_nulls       | f\n<\/code><\/pre>\n

\u6240\u6709\u5c5e\u6027\u7684\u90fd\u4e0d\u652f\u6301\uff0c<\/p>\n

6. \u5176\u5b83\u6570\u636e\u7c7b\u578b<\/h1>\n

\u5176\u5b83\u51e0\u4e2a\u63d2\u4ef6\uff0c\u53ef\u4ee5\u4e3a\u5176\u5b83\u6570\u636e\u7c7b\u578b\u63d0\u4f9b\u652f\u6301\u3002<\/p>\n

hstore\u5b9e\u73b0\u4e86K-V\u5b58\u50a8\u3002\u4e0d\u8fc7\u7531\u4e8e\u5df2\u7ecf\u6709\u4e86jsonb\uff0chstore\u7684\u4f7f\u7528\u573a\u666f\u5e76\u4e0d\u591a\u3002<\/p>\n

intarray\u6269\u5c55\u4e86int\u6570\u7ec4\u7c7b\u578b\u7684\u529f\u80fd\u3002\u5b83\u652f\u6301GiST\u7d22\u5f15\u548cGIN\u7d22\u5f15\uff08gin__int_ops\u64cd\u4f5c\u7b26\u7c7b\uff09\u3002<\/p>\n

\u4e0b\u9762\u4e24\u4e2a\u63d2\u4ef6\u5df2\u7ecf\u88ab\u63d0\u8fc7\u4e86\uff1a<\/p>\n

btree_gin\u4e3a\u5e38\u89c4\u7684\u6570\u636e\u7c7b\u578b\u589e\u52a0\u4e86GIN\u7d22\u5f15\u7684\u80fd\u529b\u3002<\/p>\n

jsquery\u5b9a\u4e49\u4e86\u4e00\u79cd\u65b0\u7684\u67e5\u8be2\u8bed\u8a00\u7528\u6765\u5904\u7406JSON\u67e5\u8be2\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"

\u672c\u6587\u4ecb\u7ecd\u4e86GIN\u7d22\u5f15\u5728\u5168\u6587\u68c0\u7d22\u3001\u6570\u7ec4\u7c7b\u578b\u3001jsonb\u7c7b\u578b\u4e0a\u7684\u5e94\u7528\u3002<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[2],"tags":[],"_links":{"self":[{"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/posts\/347"}],"collection":[{"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/comments?post=347"}],"version-history":[{"count":20,"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/posts\/347\/revisions"}],"predecessor-version":[{"id":474,"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/posts\/347\/revisions\/474"}],"wp:attachment":[{"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/media?parent=347"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/categories?post=347"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.mengqingzhong.com\/wp-json\/wp\/v2\/tags?post=347"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}