Difference between revisions of "Sphinx search engine Installation"
From Teknologisk videncenter
m |
m |
||
| Line 1: | Line 1: | ||
| − | Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC) | + | *Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC) |
| − | Following the instuctions at [[MW:Extension:SphinxSearch]] | + | *Following the instuctions at [[MW:Extension:SphinxSearch]] |
| − | Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs] | + | *Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs] |
| − | Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx] | + | *Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx] |
| − | Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz] | + | *Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz] |
| − | Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk | + | *Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk |
=== Compiling === | === Compiling === | ||
| Line 29: | Line 29: | ||
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log | mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log | ||
</pre> | </pre> | ||
| + | |||
| + | * created directory /var/data/tekkom | ||
| + | * created directory /var/data/tekkomstemmed (English morpholgy) | ||
| + | == sphnix.conf == | ||
| + | |||
| + | */usr/local/etc/sphinx.conf | ||
| + | <pre> | ||
| + | # | ||
| + | # Minimal Sphinx configuration sample (clean, simple, functional) | ||
| + | # | ||
| + | |||
| + | source src1 | ||
| + | { | ||
| + | type = mysql | ||
| + | |||
| + | sql_host = localhost | ||
| + | sql_user = heth | ||
| + | sql_pass = l8heise | ||
| + | sql_db = wikidb | ||
| + | # sql_port = 3306 # optional, default is 3306 | ||
| + | ## --> HeTh inserted | ||
| + | sql_query_pre = | ||
| + | sql_query = \ | ||
| + | SELECT old_id, old_text\ | ||
| + | FROM text | ||
| + | sql_query_post = | ||
| + | sql_query_info = SELECT * FROM text WHERE old_id=$id | ||
| + | #<--HeTh | ||
| + | ## --> HeTh Commented | ||
| + | # sql_query = \ | ||
| + | # SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ | ||
| + | # FROM documents | ||
| + | # | ||
| + | # sql_attr_uint = group_id | ||
| + | # sql_attr_timestamp = date_added | ||
| + | #<--HeTh | ||
| + | sql_query_info = SELECT * FROM documents WHERE id=$id | ||
| + | } | ||
| + | |||
| + | |||
| + | ############# --> HeTh Testing out morphology in English | ||
| + | index tekkom | ||
| + | { | ||
| + | source = src1 | ||
| + | path = /var/data/tekkom | ||
| + | docinfo = extern | ||
| + | mlock = 0 | ||
| + | morphology = none | ||
| + | min_word_len = 1 | ||
| + | charset_type = sbcs | ||
| + | html_strip = 0 | ||
| + | } | ||
| + | index tekkomstemmed : tekkom | ||
| + | { | ||
| + | path = /var/data/tekkomstemmed | ||
| + | morphology = stem_en | ||
| + | } | ||
| + | |||
| + | ############# <-- HeTh (Commented the simples index below) | ||
| + | #index tekkom | ||
| + | #{ | ||
| + | # source = src1 | ||
| + | # path = /var/data/tekkom | ||
| + | # docinfo = extern | ||
| + | # charset_type = sbcs | ||
| + | #} | ||
| + | |||
| + | |||
| + | indexer | ||
| + | { | ||
| + | # HeTh rasised to 64M from 32M 28/2-09 | ||
| + | mem_limit = 64M | ||
| + | } | ||
| + | |||
| + | |||
| + | searchd | ||
| + | { | ||
| + | listen = 3312 | ||
| + | log = /var/log/searchd.log | ||
| + | query_log = /var/log/query.log | ||
| + | read_timeout = 5 | ||
| + | max_children = 30 | ||
| + | pid_file = /var/log/searchd.pid | ||
| + | max_matches = 1000 | ||
| + | seamless_rotate = 1 | ||
| + | preopen_indexes = 0 | ||
| + | unlink_old = 1 | ||
| + | } | ||
| + | </pre> | ||
| + | == Indexing the search database == | ||
| + | <pre> | ||
| + | [root@mars /usr/local/etc]# indexer --config /usr/local/etc/sphinx.conf --all | ||
| + | Sphinx 0.9.9-rc1 (r1566) | ||
| + | Copyright (c) 2001-2008, Andrew Aksyonoff | ||
| + | |||
| + | using config file '/usr/local/etc/sphinx.conf'... | ||
| + | indexing index 'tekkom'... | ||
| + | collected 2259 docs, 6.1 MB | ||
| + | sorted 1.0 Mhits, 100.0% done | ||
| + | total 2259 docs, 6148478 bytes | ||
| + | total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec | ||
| + | indexing index 'tekkomstemmed'... | ||
| + | collected 2259 docs, 6.1 MB | ||
| + | sorted 1.0 Mhits, 100.0% done | ||
| + | total 2259 docs, 6148478 bytes | ||
| + | total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec | ||
| + | total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg | ||
| + | total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg | ||
| + | </pre> | ||
| + | == Testing the search database == | ||
| + | <pre> | ||
| + | time search "dhcp relay" | ||
| + | search -q "dhcp relay" | ||
| + | </pre> | ||
| + | Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki. | ||
| + | == Starting the search daemon manually == | ||
| + | <pre> | ||
| + | searchd --config /usr/local/etc/sphinx.conf | ||
| + | </pre> | ||
| + | == Implementing sphinx in the wiki == | ||
Revision as of 10:40, 28 February 2009
- Installer and date: Henrik Thomsen 07:32, 28 February 2009 (UTC)
- Following the instuctions at MW:Extension:SphinxSearch
- Documentation sphinx docs
- Really good article on configuring sphinx
- Downloaded Sphinx from: [1]
- Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk
Contents
Compiling
Sphinx installs default in /usr/local and search for database. No options really nessasary.
- Executed ./configure which did all the tests succesfully and generated the Makefiles.
- Executed make to generate the binaries. Showing minor warnings, but ran all tests succesfully
- Executed make install showing output below
Making install in src if test -d ../.svn; then svn info .. --xml | perl svnxrev.pl; fi; make install-am test -z "/usr/local/bin" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/bin" /usr/bin/install -c 'indexer' '/usr/local/bin/indexer' /usr/bin/install -c 'searchd' '/usr/local/bin/searchd' /usr/bin/install -c 'search' '/usr/local/bin/search' /usr/bin/install -c 'spelldump' '/usr/local/bin/spelldump' Making install in test test -z "/usr/local/etc" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/etc" /usr/bin/install -c -m 644 'sphinx.conf.dist' '/usr/local/etc/sphinx.conf.dist' /usr/bin/install -c -m 644 'sphinx-min.conf.dist' '/usr/local/etc/sphinx-min.conf.dist' /usr/bin/install -c -m 644 'example.sql' '/usr/local/etc/example.sql' make install-data-hook mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
- created directory /var/data/tekkom
- created directory /var/data/tekkomstemmed (English morpholgy)
sphnix.conf
- /usr/local/etc/sphinx.conf
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#
source src1
{
type = mysql
sql_host = localhost
sql_user = heth
sql_pass = l8heise
sql_db = wikidb
# sql_port = 3306 # optional, default is 3306
## --> HeTh inserted
sql_query_pre =
sql_query = \
SELECT old_id, old_text\
FROM text
sql_query_post =
sql_query_info = SELECT * FROM text WHERE old_id=$id
#<--HeTh
## --> HeTh Commented
# sql_query = \
# SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
# FROM documents
#
# sql_attr_uint = group_id
# sql_attr_timestamp = date_added
#<--HeTh
sql_query_info = SELECT * FROM documents WHERE id=$id
}
############# --> HeTh Testing out morphology in English
index tekkom
{
source = src1
path = /var/data/tekkom
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
charset_type = sbcs
html_strip = 0
}
index tekkomstemmed : tekkom
{
path = /var/data/tekkomstemmed
morphology = stem_en
}
############# <-- HeTh (Commented the simples index below)
#index tekkom
#{
# source = src1
# path = /var/data/tekkom
# docinfo = extern
# charset_type = sbcs
#}
indexer
{
# HeTh rasised to 64M from 32M 28/2-09
mem_limit = 64M
}
searchd
{
listen = 3312
log = /var/log/searchd.log
query_log = /var/log/query.log
read_timeout = 5
max_children = 30
pid_file = /var/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}
Indexing the search database
[root@mars /usr/local/etc]# indexer --config /usr/local/etc/sphinx.conf --all Sphinx 0.9.9-rc1 (r1566) Copyright (c) 2001-2008, Andrew Aksyonoff using config file '/usr/local/etc/sphinx.conf'... indexing index 'tekkom'... collected 2259 docs, 6.1 MB sorted 1.0 Mhits, 100.0% done total 2259 docs, 6148478 bytes total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec indexing index 'tekkomstemmed'... collected 2259 docs, 6.1 MB sorted 1.0 Mhits, 100.0% done total 2259 docs, 6148478 bytes total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg
Testing the search database
time search "dhcp relay" search -q "dhcp relay"
Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.
Starting the search daemon manually
searchd --config /usr/local/etc/sphinx.conf