This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | |||
new_rotation_sphinx [2017/04/07 16:24] 127.0.0.1 external edit |
new_rotation_sphinx [2019/01/06 10:34] (current) admin |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== Sphinx for Rotation\Tube sites ====== | ||
+ | Basically script uses mysql to search DB, but on large DB mysql too slow. + it doesnt use morphology. | ||
+ | |||
+ | Sphinx (sphinxsearch.com) - is an open source full text search server, designed from the ground up with performance, | ||
+ | |||
+ | |||
+ | Setup | ||
+ | |||
+ | * Ask admin to setup sphinx | ||
+ | * create folder scj/sphinx | ||
+ | * ask admin to add scj index into sphinx config. (See config example below ) | ||
+ | * Run indexer (or ask admin to do it) < | ||
+ | * In Rotation - Settings edit sphinx options. | ||
+ | |||
+ | That's it. | ||
+ | |||
+ | **Note** if you change DB (add\remove content) - please, run indexer every day to keep it's base up to date. | ||
+ | |||
+ | Add to cron something like (ask admin) | ||
+ | |||
+ | < | ||
+ | indexer --all --rotate | ||
+ | </ | ||
+ | |||
+ | |||
+ | ====== Sphinx configs TCMS ====== | ||
+ | |||
+ | ===== TCMS (version 2.X) ===== | ||
+ | |||
+ | < | ||
+ | |||
+ | |||
+ | searchd | ||
+ | { | ||
+ | listen | ||
+ | |||
+ | addition of mysql41 is the most important part. It's called SphinxQL | ||
+ | |||
+ | |||
+ | |||
+ | source your_name_source | ||
+ | { | ||
+ | type = mysql | ||
+ | |||
+ | sql_host = localhost | ||
+ | sql_user = | ||
+ | sql_pass = | ||
+ | sql_db = | ||
+ | sql_port = 3306 # optional, default is 3306 | ||
+ | |||
+ | sql_query_pre = SET NAMES utf8 | ||
+ | |||
+ | sql_query = SELECT gi.gallery_id, | ||
+ | (SELECT group_concat(tag_name) FROM rot_gal2tag g2t \ | ||
+ | LEFT JOIN rot_tags as t on t.tag_id = g2t.tag_id \ | ||
+ | WHERE g2t.gallery_id = gi.gallery_id) as tags, \ | ||
+ | (SELECT group_concat(tag_id) FROM rot_gal2tag g2t \ | ||
+ | WHERE g2t.gallery_id = gi.gallery_id) as tag_ids, \ | ||
+ | (SELECT group_concat(gss.group_id) FROM rot_gallery_stats1 as gss \ | ||
+ | WHERE gss.gallery_id = gi.gallery_id AND group_id != 0) as categories \ | ||
+ | FROM rot_gallery_info AS gi \ | ||
+ | JOIN rot_gallery_data1 AS gd ON gi.gallery_id = gd.gallery_id \ | ||
+ | JOIN rot_gallery_stats1 AS gs ON gs.gallery_id = gi.gallery_id \ | ||
+ | WHERE gallery_status = ' | ||
+ | and gs.best_thumb = ' | ||
+ | |||
+ | |||
+ | sql_attr_timestamp = date | ||
+ | sql_attr_uint | ||
+ | sql_attr_uint | ||
+ | sql_attr_float | ||
+ | sql_attr_uint | ||
+ | sql_attr_multi = uint categories from field; | ||
+ | sql_attr_multi = uint tag_ids from field; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | index your_name_index | ||
+ | { | ||
+ | source = your_name_source | ||
+ | path = /your_path | ||
+ | docinfo = extern | ||
+ | morphology | ||
+ | charset_type = utf-8 | ||
+ | |||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | ===== 1.52 ===== | ||
+ | |||
+ | < | ||
+ | source scj | ||
+ | { | ||
+ | type = mysql | ||
+ | |||
+ | sql_host = localhost | ||
+ | sql_user = scj_mysql_login | ||
+ | sql_pass = scj_mysql_password | ||
+ | sql_db = scj_db | ||
+ | sql_port = 3306 # optional, default is 3306 | ||
+ | |||
+ | sql_query = SELECT id, date, tags, alt, description, | ||
+ | | ||
+ | // for version 49 | ||
+ | // | ||
+ | // | ||
+ | |||
+ | sql_attr_timestamp = date | ||
+ | sql_attr_uint | ||
+ | sql_attr_uint | ||
+ | sql_attr_uint | ||
+ | |||
+ | sql_query_info | ||
+ | } | ||
+ | |||
+ | |||
+ | index scj_index | ||
+ | { | ||
+ | source = scj | ||
+ | path = / | ||
+ | docinfo = extern | ||
+ | morphology | ||
+ | } | ||
+ | |||
+ | |||
+ | </ | ||
+ | ===== 1.51 ===== | ||
+ | |||
+ | < | ||
+ | source scj | ||
+ | { | ||
+ | type = mysql | ||
+ | |||
+ | sql_host = | ||
+ | sql_user = | ||
+ | sql_pass = | ||
+ | sql_db = | ||
+ | sql_port = 3306 # optional, default is 3306 | ||
+ | |||
+ | sql_query = SELECT id, crc32(g.gallery_md5) as gallery_md5_crc, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | sql_attr_timestamp = date | ||
+ | sql_attr_uint | ||
+ | sql_attr_uint | ||
+ | sql_attr_uint | ||
+ | sql_attr_float | ||
+ | sql_attr_uint | ||
+ | sql_attr_uint = gallery_md5_crc | ||
+ | sql_attr_multi = uint categories from field; | ||
+ | |||
+ | sql_query_info = SELECT * FROM rot_galleries WHERE id=$id | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | You have to edit here: | ||
+ | |||
+ | scj_mysql_login | ||
+ | scj_mysql_password | ||
+ | scj_db | ||
+ | path_to_data | ||
+ | </ | ||
+ | ====== Sphinx Delta Config ====== | ||
+ | |||
+ | For Version 2.X | ||
+ | |||
+ | This part make sense only if you have a really big DB (1M+ , for example a demo version bigbase.smartcj.com has 12M+ galleries) | ||
+ | |||
+ | When Sphinx creates an index it takes a snapshot of a current state of a DB. So if something has been changed since then - it won't be reflected in index. And if you have a big DB it takes a lot of time to reindex it. | ||
+ | |||
+ | So the good idea is to just reindex those parts that have been changed. That's why you need so called delta index | ||
+ | |||
+ | To make use of it you need to add after | ||
+ | |||
+ | sql_query_pre = SET NAMES utf8 | ||
+ | |||
+ | a new line | ||
+ | |||
+ | sql_query_post_index = UPDATE rot_settings SET value = (SELECT MAX(gallery_id) FROM rot_gallery_info) WHERE name = ' | ||
+ | |||
+ | |||
+ | And add a new source | ||
+ | |||
+ | < | ||
+ | |||
+ | source delta : your_name_source | ||
+ | { | ||
+ | sql_query_pre = SET NAMES utf8 | ||
+ | |||
+ | sql_query = SELECT gi.gallery_id, | ||
+ | (SELECT group_concat(tag_name) FROM rot_gal2tag g2t \ | ||
+ | LEFT JOIN rot_tags as t on t.tag_id = g2t.tag_id \ | ||
+ | WHERE g2t.gallery_id = gi.gallery_id) as tags, \ | ||
+ | (SELECT group_concat(gss.group_id) FROM rot_gallery_stats1 as gss \ | ||
+ | WHERE gss.gallery_id = gi.gallery_id AND group_id != 0) as categories \ | ||
+ | FROM rot_gallery_info AS gi \ | ||
+ | JOIN rot_gallery_data1 AS gd ON gi.gallery_id = gd.gallery_id \ | ||
+ | JOIN rot_gallery_stats1 AS gs ON gs.gallery_id = gi.gallery_id \ | ||
+ | WHERE gi.gallery_id > ( SELECT value FROM rot_settings WHERE name = ' | ||
+ | AND gallery_status = ' | ||
+ | and gs.best_thumb = ' | ||
+ | | ||
+ | } | ||
+ | |||
+ | |||
+ | index delta : your_name_index | ||
+ | { | ||
+ | source = delta | ||
+ | path = / | ||
+ | } | ||
+ | |||
+ | |||
+ | </ | ||
+ | |||
+ | |||
+ | You can note that it has the same query with an addition of sphinx_max_gallery_id. | ||
+ | |||
+ | Now you have to add a new crontab task that will reindex new parts | ||
+ | |||
+ | indexer --rotate delta | ||
+ | |||
+ | Now you have to add ' | ||
+ | |||
+ | once a day you can marge main index and delta index | ||
+ | |||
+ | indexer --rotate --merge your_name_index delta | ||
+ | |||
+ | |||
+ | That's it. |