mecabプラグイン
MySQL5.7からmecabプラグインが利用出来るようになるので、基本的な動作確認をしました。
現状では、eucjpms (ujis), cp932 (sjis), and utf8 (utf8mb4)のみ対応している状態です。

mecab-pug

Mecabについて
http://ja.wikipedia.org/wiki/MeCab

参考: 
InnoDB Full-Text: MeCab Parser
http://planet.mysql.com/entry/?id=5989321

初期設定
Plugin Directoryにモジュールや設定ファイルがあります。

[root@misc01 mecab]# ls -l /usr/local/mysql/lib/
合計 800168
-rw-r--r--. 1 root mysql  21934486  2月 27 00:04 libmysqlclient.a
lrwxrwxrwx. 1 root mysql        20  2月 27 00:12 libmysqlclient.so -> libmysqlclient.so.20
lrwxrwxrwx. 1 root mysql        24  2月 27 00:12 libmysqlclient.so.20 -> libmysqlclient.so.20.0.0
-rwxr-xr-x. 1 root mysql  10071218  2月 27 00:04 libmysqlclient.so.20.0.0
lrwxrwxrwx. 1 root mysql        16  2月 27 00:12 libmysqlclient_r.a -> libmysqlclient.a
lrwxrwxrwx. 1 root mysql        17  2月 27 00:12 libmysqlclient_r.so -> libmysqlclient.so
lrwxrwxrwx. 1 root mysql        20  2月 27 00:12 libmysqlclient_r.so.20 -> libmysqlclient.so.20
lrwxrwxrwx. 1 root mysql        24  2月 27 00:12 libmysqlclient_r.so.20.0.0 -> libmysqlclient.so.20.0.0
-rw-r--r--. 1 root mysql 347907574  2月 27 00:03 libmysqld-debug.a
-rw-r--r--. 1 root mysql 438661070  2月 27 00:12 libmysqld.a
-rw-r--r--. 1 root mysql     27154  2月 27 00:03 libmysqlservices.a
-rw-r--r--. 1 root mysql    754332  2月 26 23:58 libtcmalloc_minimal.so
drwxr-xr-x. 4 root mysql        26  3月 28 08:30 mecab
drwxr-xr-x. 3 root mysql      4096  3月 28 08:30 plugin
[root@misc01 mecab]# ls -l
合計 0
drwxr-xr-x. 5 root mysql 63  3月 28 08:30 dic
drwxr-xr-x. 2 root mysql 20  3月 29 07:29 etc
[root@misc01 mecab]# pwd
/usr/local/mysql/lib/mecab
[root@misc01 mecab]# ls -l dic/
合計 12
drwxr-xr-x. 2 root mysql 4096  3月 28 08:30 ipadic_euc-jp
drwxr-xr-x. 2 root mysql 4096  3月 28 08:30 ipadic_sjis
drwxr-xr-x. 2 root mysql 4096  3月 28 08:30 ipadic_utf-8
[root@misc01 mecab]# ls -l etc/
合計 4
-rw-r--r--. 1 root mysql 379  3月 29 07:29 mecabrc
[root@misc01 mecab]# 

基本インストールと設定
mecab設定ファイル

[root@misc01 etc]# cat mecabrc 
; Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
;
; Configuration file of MeCab
;
; We have three ipadics in /path/to/mysql/lib/mecab/dic:
; ipadic_euc-jp, ipadic_sjis, and ipadic_utf8.
; you can choose any of them or use your own one.
dicdir =  /path/to/mysql/lib/mecab/lib/mecab/dic/ipadic_euc-jp
[root@misc01 etc]# vi mecabrc 
[root@misc01 etc]# cat mecabrc 
; Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
;
; Configuration file of MeCab
;
; We have three ipadics in /path/to/mysql/lib/mecab/dic:
; ipadic_euc-jp, ipadic_sjis, and ipadic_utf8.
; you can choose any of them or use your own one.
; dicdir =  /path/to/mysql/lib/mecab/lib/mecab/dic/ipadic_euc-jp
dicdir = /usr/local/mysql/lib/mecab/dic/ipadic_utf-8
[root@misc01 etc]# 


[root@misc01 plugin]# /usr/local/mysql/bin/mysql -u root -p
Enter password: 
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 3
Server version: 5.7.6-m16-log MySQL Community Server (GPL)

Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

root@localhost [(none)]> show variables like 'plug%';
+---------------+------------------------------+
| Variable_name | Value                        |
+---------------+------------------------------+
| plugin_dir    | /usr/local/mysql/lib/plugin/ |
+---------------+------------------------------+
1 row in set (0.00 sec)

root@localhost [(none)]> INSTALL PLUGIN mecab SONAME 'libpluginmecab.so';
Query OK, 0 rows affected (0.29 sec)

root@localhost [(none)]> SHOW STATUS LIKE 'mecab_charset';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| mecab_charset | utf8  |
+---------------+-------+
1 row in set (0.00 sec)

root@localhost [(none)]> show variables like 'innodb_ft_min_token_size';
+--------------------------+-------+
| Variable_name            | Value |
+--------------------------+-------+
| innodb_ft_min_token_size | 2     |
+--------------------------+-------+
1 row in set (0.00 sec)


root@localhost [(none)]> select PLUGIN_NAME,PLUGIN_VERSION,PLUGIN_LICENSE from information_schema.plugins
    -> where PLUGIN_NAME = 'meCab';
+-------------+----------------+----------------+
| PLUGIN_NAME | PLUGIN_VERSION | PLUGIN_LICENSE |
+-------------+----------------+----------------+
| mecab       | 0.1            | GPL            |
+-------------+----------------+----------------+
1 row in set (0.00 sec)

root@localhost [(none)]> 

install

検証用データベース、テーブル、データの作成

root@localhost [(none)]> CREATE DATABASE `mecab` /*!40100 DEFAULT CHARACTER SET utf8mb4 */;
Query OK, 1 row affected (0.00 sec)

root@localhost [(none)]> use mecab
Database changed
root@localhost [mecab]> CREATE TABLE M_DEMO
    ->  (
    ->   FTS_M_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
    ->   title VARCHAR(100),
    ->   FULLTEXT INDEX mecab_idx(title) WITH PARSER mecab
    ->   ) Engine=InnoDB CHARACTER SET utf8;
Query OK, 0 rows affected (0.15 sec)

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('東京都は日本の首都です'),('京都と大阪は日本の府です');
 
Query OK, 2 rows affected (0.69 sec)
Records: 2  Duplicates: 0  Warnings: 0

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('mysql');
Query OK, 1 row affected (0.00 sec)

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('MYSQL');
Query OK, 1 row affected (0.01 sec)

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('MySQL');
Query OK, 1 row affected (0.00 sec)

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('マイエスキューエル');
Query OK, 1 row affected (0.00 sec)

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('マイエスキューエル');
Query OK, 1 row affected (0.00 sec)

root@localhost [mecab]> INSERT INTO M_DEMO (title) VALUES ('まいえすきゅーえる');
Query OK, 1 row affected (0.01 sec)

root@localhost [mecab]> 


root@localhost [mecab]> select * from M_DEMO;
+----------+--------------------------------------+
| FTS_M_ID | title                                |
+----------+--------------------------------------+
|        1 | 東京都は日本の首都です               |
|        2 | 京都と大阪は日本の府です             |
|        3 | mysql                                |
|        4 | MYSQL                                |
|        5 | MySQL                                |
|        6 | マイエスキューエル                   |
|        7 | マイエスキューエル                            |
|        8 | まいえすきゅーえる                   |
+----------+--------------------------------------+
8 rows in set (0.00 sec)

root@localhost [mecab]> SET GLOBAL innodb_ft_aux_table="mecab/M_DEMO";
Query OK, 0 rows affected (0.00 sec)

root@localhost [mecab]> SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE ORDER BY doc_id, position;
+-----------------------------+--------------+-------------+-----------+--------+----------+
| WORD                        | FIRST_DOC_ID | LAST_DOC_ID | DOC_COUNT | DOC_ID | POSITION |
+-----------------------------+--------------+-------------+-----------+--------+----------+
| 東京                        |            2 |           2 |         1 |      2 |        0 |
| 日本                        |            2 |           3 |         2 |      2 |       12 |
| 首都                        |            2 |           2 |         1 |      2 |       21 |
| です                        |            2 |           3 |         2 |      2 |       27 |
| 京都                        |            3 |           3 |         1 |      3 |        0 |
| 大阪                        |            3 |           3 |         1 |      3 |        9 |
| 日本                        |            2 |           3 |         2 |      3 |       18 |
| です                        |            2 |           3 |         2 |      3 |       30 |
| mysql                       |            4 |           6 |         3 |      4 |        0 |
| mysql                       |            4 |           6 |         3 |      5 |        0 |
| mysql                       |            4 |           6 |         3 |      6 |        0 |
| マイエスキューエル          |            7 |           7 |         1 |      7 |        0 |
| マイエスキューエル                   |            8 |           8 |         1 |      8 |        0 |
| いえ                        |            9 |           9 |         1 |      9 |        3 |
| すき                        |            9 |           9 |         1 |      9 |        9 |
| える                        |            9 |           9 |         1 |      9 |       21 |
+-----------------------------+--------------+-------------+-----------+--------+----------+
16 rows in set (0.00 sec)

root@localhost [mecab]> 

mecab

■ IN NATURAL LANGUAGE MODE(Text Searches)
Text searched for is converted to a union of search tokens.
  For example, ‘日本の首都’ is converted to ‘日本 の 首都’.

root@localhost [mecab]> SELECT * FROM M_DEMO WHERE MATCH(title) AGAINST('日本の首都' IN NATURAL LANGUAGE MODE);
+----------+--------------------------------------+
| FTS_M_ID | title                                |
+----------+--------------------------------------+
|        1 | 東京都は日本の首都です               |
|        2 | 京都と大阪は日本の府です             |
+----------+--------------------------------------+
2 rows in set (0.00 sec)

root@localhost [mecab]>  SELECT FTS_M_ID,title,MATCH (title) AGAINST('日本の首都' IN NATURAL LANGUAGE MODE) AS score FROM M_DEMO;
+----------+--------------------------------------+--------------------+
| FTS_M_ID | title                                | score              |
+----------+--------------------------------------+--------------------+
|        1 | 東京都は日本の首都です               |  1.178047776222229 |
|        2 | 京都と大阪は日本の府です             | 0.3624762296676636 |
|        3 | mysql                                |                  0 |
|        4 | MYSQL                                |                  0 |
|        5 | MySQL                                |                  0 |
|        6 | マイエスキューエル                   |                  0 |
|        7 | マイエスキューエル                            |                  0 |
|        8 | まいえすきゅーえる                   |                  0 |
+----------+--------------------------------------+--------------------+
8 rows in set (0.00 sec)

■ IN BOOLEAN MODE(Text Searches)
Text searched for is converted to a phrase search.
  For example, ‘日本の首都’ is converted to ‘”日本 の 首都”‘.

root@localhost [mecab]> SELECT * FROM M_DEMO WHERE MATCH(title) AGAINST('日本の首都' IN BOOLEAN MODE);
+----------+-----------------------------------+
| FTS_M_ID | title                             |
+----------+-----------------------------------+
|        1 | 東京都は日本の首都です            |
+----------+-----------------------------------+
1 row in set (0.00 sec)

root@localhost [mecab]> SELECT FTS_M_ID,title,MATCH (title) AGAINST('日本の首都' IN BOOLEAN MODE) AS score FROM M_DEMO;
+----------+--------------------------------------+-------------------+
| FTS_M_ID | title                                | score             |
+----------+--------------------------------------+-------------------+
|        1 | 東京都は日本の首都です               | 1.178047776222229 |
|        2 | 京都と大阪は日本の府です             |                 0 |
|        3 | mysql                                |                 0 |
|        4 | MYSQL                                |                 0 |
|        5 | MySQL                                |                 0 |
|        6 | マイエスキューエル                   |                 0 |
|        7 | マイエスキューエル                            |                 0 |
|        8 | まいえすきゅーえる                   |                 0 |
+----------+--------------------------------------+-------------------+
8 rows in set (0.00 sec)

mecab_cost

■ IN BOOLEAN MODE(Wildcard Searches)
No tokenization for the text of a wildcard search.
For example, for ‘日本の首都*’ we will search the prefix of ‘日本の首都’

root@localhost [mecab]> SELECT * FROM M_DEMO WHERE MATCH(title) AGAINST('日本*' IN BOOLEAN MODE);
+----------+--------------------------------------+
| FTS_M_ID | title                                |
+----------+--------------------------------------+
|        1 | 東京都は日本の首都です               |
|        2 | 京都と大阪は日本の府です             |
+----------+--------------------------------------+
2 rows in set (0.01 sec)

root@localhost [mecab]> SELECT FTS_M_ID,title,MATCH (title) AGAINST('日本*' IN BOOLEAN MODE) AS score FROM M_DEMO;
+----------+--------------------------------------+--------------------+
| FTS_M_ID | title                                | score              |
+----------+--------------------------------------+--------------------+
|        1 | 東京都は日本の首都です               | 0.3624762296676636 |
|        2 | 京都と大阪は日本の府です             | 0.3624762296676636 |
|        3 | mysql                                |                  0 |
|        4 | MYSQL                                |                  0 |
|        5 | MySQL                                |                  0 |
|        6 | マイエスキューエル                   |                  0 |
|        7 | マイエスキューエル                            |                  0 |
|        8 | まいえすきゅーえる                   |                  0 |
+----------+--------------------------------------+--------------------+
8 rows in set (0.00 sec)

root@localhost [mecab]> SELECT * FROM M_DEMO WHERE MATCH(title) AGAINST('日本の首都' IN BOOLEAN MODE);
+----------+-----------------------------------+
| FTS_M_ID | title                             |
+----------+-----------------------------------+
|        1 | 東京都は日本の首都です            |
+----------+-----------------------------------+
1 row in set (0.00 sec)

root@localhost [mecab]> SELECT FTS_M_ID,title,MATCH (title) AGAINST('日本の首都' IN BOOLEAN MODE) AS score FROM M_DEMO;
+----------+--------------------------------------+-------------------+
| FTS_M_ID | title                                | score             |
+----------+--------------------------------------+-------------------+
|        1 | 東京都は日本の首都です               | 1.178047776222229 |
|        2 | 京都と大阪は日本の府です             |                 0 |
|        3 | mysql                                |                 0 |
|        4 | MYSQL                                |                 0 |
|        5 | MySQL                                |                 0 |
|        6 | マイエスキューエル                   |                 0 |
|        7 | マイエスキューエル                            |                 0 |
|        8 | まいえすきゅーえる                   |                 0 |
+----------+--------------------------------------+-------------------+
8 rows in set (0.00 sec)


root@localhost [mecab]> SELECT * FROM M_DEMO WHERE MATCH(title) AGAINST('日本の首都*' IN BOOLEAN MODE);
Empty set (0.00 sec)

root@localhost [mecab]>  SELECT FTS_M_ID,title,MATCH (title) AGAINST('日本の首都*' IN BOOLEAN MODE) AS score FROM M_DEMO;
+----------+--------------------------------------+-------+
| FTS_M_ID | title                                | score |
+----------+--------------------------------------+-------+
|        1 | 東京都は日本の首都です               |     0 |
|        2 | 京都と大阪は日本の府です             |     0 |
|        3 | mysql                                |     0 |
|        4 | MYSQL                                |     0 |
|        5 | MySQL                                |     0 |
|        6 | マイエスキューエル                   |     0 |
|        7 | マイエスキューエル                            |     0 |
|        8 | まいえすきゅーえる                   |     0 |
+----------+--------------------------------------+-------+
8 rows in set (0.00 sec)

root@localhost [mecab]> 

■ IN BOOLEAN MODE(Phrase Searches)
A phrase search is tokenized by mecab. For example,
“日本の首都” is converted to “日本 の 首都”.

root@localhost [mecab]> SELECT * FROM M_DEMO WHERE MATCH(title) AGAINST('"日本の首都"' IN BOOLEAN MODE);
+----------+-----------------------------------+
| FTS_M_ID | title                             |
+----------+-----------------------------------+
|        1 | 東京都は日本の首都です            |
+----------+-----------------------------------+
1 row in set (0.00 sec)

root@localhost [mecab]>  SELECT FTS_M_ID,title,MATCH (title) AGAINST('"日本の首都"' IN BOOLEAN MODE) AS score FROM M_DEMO;
+----------+--------------------------------------+-------------------+
| FTS_M_ID | title                                | score             |
+----------+--------------------------------------+-------------------+
|        1 | 東京都は日本の首都です               | 1.178047776222229 |
|        2 | 京都と大阪は日本の府です             |                 0 |
|        3 | mysql                                |                 0 |
|        4 | MYSQL                                |                 0 |
|        5 | MySQL                                |                 0 |
|        6 | マイエスキューエル                   |                 0 |
|        7 | マイエスキューエル                            |                 0 |
|        8 | まいえすきゅーえる                   |                 0 |
+----------+--------------------------------------+-------------------+
8 rows in set (0.00 sec)

root@localhost [mecab]> 

Comments are closed.

Post Navigation