Difference between revisions of "Sphinx search engine Installation"
From Teknologisk videncenter
								
												
				m  | 
				m  | 
				||
| Line 1: | Line 1: | ||
| − | Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC)  | + | *Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC)  | 
| − | Following the instuctions at [[MW:Extension:SphinxSearch]]  | + | *Following the instuctions at [[MW:Extension:SphinxSearch]]  | 
| − | Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs]  | + | *Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs]  | 
| − | Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx]  | + | *Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx]  | 
| − | Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz]  | + | *Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz]  | 
| − | Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk  | + | *Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk  | 
=== Compiling ===  | === Compiling ===  | ||
| Line 29: | Line 29: | ||
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log  | mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log  | ||
</pre>  | </pre>  | ||
| + | |||
| + | * created directory /var/data/tekkom  | ||
| + | * created directory /var/data/tekkomstemmed (English morpholgy)  | ||
| + | == sphnix.conf ==  | ||
| + | |||
| + | */usr/local/etc/sphinx.conf  | ||
| + | <pre>  | ||
| + | #  | ||
| + | # Minimal Sphinx configuration sample (clean, simple, functional)  | ||
| + | #  | ||
| + | |||
| + | source src1  | ||
| + | {  | ||
| + |         type                                    = mysql  | ||
| + | |||
| + |         sql_host                                = localhost  | ||
| + |         sql_user                                = heth  | ||
| + |         sql_pass                                = l8heise  | ||
| + |         sql_db                                  = wikidb  | ||
| + | #       sql_port                                = 3306  # optional, default is 3306  | ||
| + | ## --> HeTh inserted  | ||
| + |         sql_query_pre   =  | ||
| + |         sql_query       = \  | ||
| + |           SELECT old_id, old_text\  | ||
| + |           FROM text  | ||
| + |         sql_query_post  =  | ||
| + |         sql_query_info  = SELECT * FROM text WHERE old_id=$id  | ||
| + | #<--HeTh  | ||
| + | ## --> HeTh Commented  | ||
| + | #       sql_query                               = \  | ||
| + | #               SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \  | ||
| + | #               FROM documents  | ||
| + | #  | ||
| + | #       sql_attr_uint                   = group_id  | ||
| + | #       sql_attr_timestamp              = date_added  | ||
| + | #<--HeTh  | ||
| + |         sql_query_info                  = SELECT * FROM documents WHERE id=$id  | ||
| + | }  | ||
| + | |||
| + | |||
| + | ############# --> HeTh Testing out morphology in English  | ||
| + | index tekkom  | ||
| + | {  | ||
| + |         source                  = src1  | ||
| + |         path                    = /var/data/tekkom  | ||
| + |         docinfo                 = extern  | ||
| + |         mlock                   = 0  | ||
| + |         morphology              = none  | ||
| + |         min_word_len            = 1  | ||
| + |         charset_type            = sbcs  | ||
| + |         html_strip                              = 0  | ||
| + | }  | ||
| + | index tekkomstemmed : tekkom  | ||
| + | {  | ||
| + |         path                    = /var/data/tekkomstemmed  | ||
| + |         morphology              = stem_en  | ||
| + | }  | ||
| + | |||
| + | ############# <-- HeTh (Commented the simples index below)  | ||
| + | #index tekkom  | ||
| + | #{  | ||
| + | #       source                                  = src1  | ||
| + | #       path                                    = /var/data/tekkom  | ||
| + | #       docinfo                                 = extern  | ||
| + | #       charset_type                    = sbcs  | ||
| + | #}  | ||
| + | |||
| + | |||
| + | indexer  | ||
| + | {  | ||
| + |         # HeTh rasised to 64M from 32M 28/2-09  | ||
| + |         mem_limit                               = 64M  | ||
| + | }  | ||
| + | |||
| + | |||
| + | searchd  | ||
| + | {  | ||
| + |         listen                                  = 3312  | ||
| + |         log                                             = /var/log/searchd.log  | ||
| + |         query_log                               = /var/log/query.log  | ||
| + |         read_timeout                    = 5  | ||
| + |         max_children                    = 30  | ||
| + |         pid_file                                = /var/log/searchd.pid  | ||
| + |         max_matches                             = 1000  | ||
| + |         seamless_rotate                 = 1  | ||
| + |         preopen_indexes                 = 0  | ||
| + |         unlink_old                              = 1  | ||
| + | }  | ||
| + | </pre>  | ||
| + | == Indexing the search database ==  | ||
| + | <pre>  | ||
| + | [root@mars /usr/local/etc]#  indexer --config /usr/local/etc/sphinx.conf --all  | ||
| + | Sphinx 0.9.9-rc1 (r1566)  | ||
| + | Copyright (c) 2001-2008, Andrew Aksyonoff  | ||
| + | |||
| + | using config file '/usr/local/etc/sphinx.conf'...  | ||
| + | indexing index 'tekkom'...  | ||
| + | collected 2259 docs, 6.1 MB  | ||
| + | sorted 1.0 Mhits, 100.0% done  | ||
| + | total 2259 docs, 6148478 bytes  | ||
| + | total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec  | ||
| + | indexing index 'tekkomstemmed'...  | ||
| + | collected 2259 docs, 6.1 MB  | ||
| + | sorted 1.0 Mhits, 100.0% done  | ||
| + | total 2259 docs, 6148478 bytes  | ||
| + | total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec  | ||
| + | total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg  | ||
| + | total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg  | ||
| + | </pre>  | ||
| + | == Testing the search database ==  | ||
| + | <pre>  | ||
| + | time search "dhcp relay"  | ||
| + | search -q "dhcp relay"  | ||
| + | </pre>  | ||
| + | Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.  | ||
| + | == Starting the search daemon manually ==  | ||
| + | <pre>  | ||
| + | searchd --config /usr/local/etc/sphinx.conf  | ||
| + | </pre>  | ||
| + | == Implementing sphinx in the wiki ==  | ||
Revision as of 10:40, 28 February 2009
- Installer and date: Henrik Thomsen 07:32, 28 February 2009 (UTC)
 - Following the instuctions at MW:Extension:SphinxSearch
 - Documentation sphinx docs
 - Really good article on configuring sphinx
 - Downloaded Sphinx from: [1]
 - Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk
 
Contents
Compiling
Sphinx installs default in /usr/local and search for database. No options really nessasary.
- Executed ./configure which did all the tests succesfully and generated the Makefiles.
 - Executed make to generate the binaries. Showing minor warnings, but ran all tests succesfully
 - Executed make install showing output below
 
Making install in src if test -d ../.svn; then svn info .. --xml | perl svnxrev.pl; fi; make install-am test -z "/usr/local/bin" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/bin" /usr/bin/install -c 'indexer' '/usr/local/bin/indexer' /usr/bin/install -c 'searchd' '/usr/local/bin/searchd' /usr/bin/install -c 'search' '/usr/local/bin/search' /usr/bin/install -c 'spelldump' '/usr/local/bin/spelldump' Making install in test test -z "/usr/local/etc" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/etc" /usr/bin/install -c -m 644 'sphinx.conf.dist' '/usr/local/etc/sphinx.conf.dist' /usr/bin/install -c -m 644 'sphinx-min.conf.dist' '/usr/local/etc/sphinx-min.conf.dist' /usr/bin/install -c -m 644 'example.sql' '/usr/local/etc/example.sql' make install-data-hook mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
- created directory /var/data/tekkom
 - created directory /var/data/tekkomstemmed (English morpholgy)
 
sphnix.conf
- /usr/local/etc/sphinx.conf
 
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#
source src1
{
        type                                    = mysql
        sql_host                                = localhost
        sql_user                                = heth
        sql_pass                                = l8heise
        sql_db                                  = wikidb
#       sql_port                                = 3306  # optional, default is 3306
## --> HeTh inserted
        sql_query_pre   =
        sql_query       = \
          SELECT old_id, old_text\
          FROM text
        sql_query_post  =
        sql_query_info  = SELECT * FROM text WHERE old_id=$id
#<--HeTh
## --> HeTh Commented
#       sql_query                               = \
#               SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
#               FROM documents
#
#       sql_attr_uint                   = group_id
#       sql_attr_timestamp              = date_added
#<--HeTh
        sql_query_info                  = SELECT * FROM documents WHERE id=$id
}
############# --> HeTh Testing out morphology in English
index tekkom
{
        source                  = src1
        path                    = /var/data/tekkom
        docinfo                 = extern
        mlock                   = 0
        morphology              = none
        min_word_len            = 1
        charset_type            = sbcs
        html_strip                              = 0
}
index tekkomstemmed : tekkom
{
        path                    = /var/data/tekkomstemmed
        morphology              = stem_en
}
############# <-- HeTh (Commented the simples index below)
#index tekkom
#{
#       source                                  = src1
#       path                                    = /var/data/tekkom
#       docinfo                                 = extern
#       charset_type                    = sbcs
#}
indexer
{
        # HeTh rasised to 64M from 32M 28/2-09
        mem_limit                               = 64M
}
searchd
{
        listen                                  = 3312
        log                                             = /var/log/searchd.log
        query_log                               = /var/log/query.log
        read_timeout                    = 5
        max_children                    = 30
        pid_file                                = /var/log/searchd.pid
        max_matches                             = 1000
        seamless_rotate                 = 1
        preopen_indexes                 = 0
        unlink_old                              = 1
}
Indexing the search database
[root@mars /usr/local/etc]# indexer --config /usr/local/etc/sphinx.conf --all Sphinx 0.9.9-rc1 (r1566) Copyright (c) 2001-2008, Andrew Aksyonoff using config file '/usr/local/etc/sphinx.conf'... indexing index 'tekkom'... collected 2259 docs, 6.1 MB sorted 1.0 Mhits, 100.0% done total 2259 docs, 6148478 bytes total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec indexing index 'tekkomstemmed'... collected 2259 docs, 6.1 MB sorted 1.0 Mhits, 100.0% done total 2259 docs, 6148478 bytes total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg
Testing the search database
time search "dhcp relay" search -q "dhcp relay"
Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.
Starting the search daemon manually
searchd --config /usr/local/etc/sphinx.conf