📄 client.rb

📁 做搜索的
💻 RB
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
# = client.rb - Sphinx Client API
# 
# Author::    Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
# License::   Distributes under the same terms as Ruby
# Version::   0.9.8
# Website::   http://kpumuk.info/projects/ror-plugins/sphinx
#
# This library is distributed under the terms of the Ruby license.
# You can freely distribute/modify this library.

# ==Sphinx Client API
# 
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
# daemon and get search results from Sphinx.
# 
# ===Usage
# 
#   sphinx = Sphinx::Client.new
#   result = sphinx.Query('test')
#   ids = result['matches'].map { |match| match['id'] }.join(',')
#   posts = Post.find :all, :conditions => "id IN (#{ids})"
#   
#   docs = posts.map(&:body)
#   excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')

require 'socket'

module Sphinx
  # :stopdoc:

  class SphinxError < StandardError; end
  class SphinxArgumentError < SphinxError; end
  class SphinxConnectError < SphinxError; end
  class SphinxResponseError < SphinxError; end
  class SphinxInternalError < SphinxError; end
  class SphinxTemporaryError < SphinxError; end
  class SphinxUnknownError < SphinxError; end

  # :startdoc:

  class Client
  
    # :stopdoc:
  
    # Known searchd commands
  
    # search command
    SEARCHD_COMMAND_SEARCH   = 0
    # excerpt command
    SEARCHD_COMMAND_EXCERPT  = 1
    # update command
    SEARCHD_COMMAND_UPDATE   = 2 
    # keywords command
    SEARCHD_COMMAND_KEYWORDS = 3 
  
    # Current client-side command implementation versions
    
    # search command version
    VER_COMMAND_SEARCH   = 0x113
    # excerpt command version
    VER_COMMAND_EXCERPT  = 0x100
    # update command version
    VER_COMMAND_UPDATE   = 0x101
    # keywords command version
    VER_COMMAND_KEYWORDS = 0x100
    
    # Known searchd status codes
  
    # general success, command-specific reply follows
    SEARCHD_OK      = 0
    # general failure, command-specific reply may follow
    SEARCHD_ERROR   = 1
    # temporaty failure, client should retry later
    SEARCHD_RETRY   = 2
    # general success, warning message and command-specific reply follow 
    SEARCHD_WARNING = 3    
    
    # :startdoc:
  
    # Known match modes
  
    # match all query words
    SPH_MATCH_ALL       = 0 
    # match any query word
    SPH_MATCH_ANY       = 1 
    # match this exact phrase
    SPH_MATCH_PHRASE    = 2 
    # match this boolean query
    SPH_MATCH_BOOLEAN   = 3 
    # match this extended query
    SPH_MATCH_EXTENDED  = 4 
    # match all document IDs w/o fulltext query, apply filters
    SPH_MATCH_FULLSCAN  = 5
    # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
    SPH_MATCH_EXTENDED2 = 6
    
    # Known ranking modes (ext2 only)
  
    # default mode, phrase proximity major factor and BM25 minor one
    SPH_RANK_PROXIMITY_BM25 = 0
    # statistical mode, BM25 ranking only (faster but worse quality)
    SPH_RANK_BM25           = 1
    # no ranking, all matches get a weight of 1
    SPH_RANK_NONE           = 2
    # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
    SPH_RANK_WORDCOUNT      = 3
    
    # Known sort modes
  
    # sort by document relevance desc, then by date
    SPH_SORT_RELEVANCE     = 0
    # sort by document date desc, then by relevance desc
    SPH_SORT_ATTR_DESC     = 1
    # sort by document date asc, then by relevance desc
    SPH_SORT_ATTR_ASC      = 2
    # sort by time segments (hour/day/week/etc) desc, then by relevance desc
    SPH_SORT_TIME_SEGMENTS = 3
    # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
    SPH_SORT_EXTENDED      = 4
    # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
    SPH_SORT_EXPR          = 5
    
    # Known filter types
    
    # filter by integer values set
    SPH_FILTER_VALUES      = 0
    # filter by integer range
    SPH_FILTER_RANGE       = 1
    # filter by float range
    SPH_FILTER_FLOATRANGE  = 2
    
    # Known attribute types
  
    # this attr is just an integer
    SPH_ATTR_INTEGER   = 1
    # this attr is a timestamp
    SPH_ATTR_TIMESTAMP = 2
    # this attr is an ordinal string number (integer at search time, 
    # specially handled at indexing time)
    SPH_ATTR_ORDINAL   = 3
    # this attr is a boolean bit field
    SPH_ATTR_BOOL      = 4
    # this attr is a float
    SPH_ATTR_FLOAT     = 5
    # this attr has multiple values (0 or more)
    SPH_ATTR_MULTI     = 0x40000000
    
    # Known grouping functions
  
    # group by day
    SPH_GROUPBY_DAY      = 0
    # group by week
    SPH_GROUPBY_WEEK     = 1 
    # group by month
    SPH_GROUPBY_MONTH    = 2 
    # group by year
    SPH_GROUPBY_YEAR     = 3
    # group by attribute value
    SPH_GROUPBY_ATTR     = 4
    # group by sequential attrs pair
    SPH_GROUPBY_ATTRPAIR = 5
    
    # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values. 
    def initialize
      # per-client-object settings
      @host          = 'localhost'             # searchd host (default is "localhost")
      @port          = 3312                    # searchd port (default is 3312)
      
      # per-query settings
      @offset        = 0                       # how many records to seek from result-set start (default is 0)
      @limit         = 20                      # how many records to return from result-set starting at offset (default is 20)
      @mode          = SPH_MATCH_ALL           # query matching mode (default is SPH_MATCH_ALL)
      @weights       = []                      # per-field weights (default is 1 for all fields)
      @sort          = SPH_SORT_RELEVANCE      # match sorting mode (default is SPH_SORT_RELEVANCE)
      @sortby        = ''                      # attribute to sort by (defualt is "")
      @min_id        = 0                       # min ID to match (default is 0, which means no limit)
      @max_id        = 0                       # max ID to match (default is 0, which means no limit)
      @filters       = []                      # search filters
      @groupby       = ''                      # group-by attribute name
      @groupfunc     = SPH_GROUPBY_DAY         # function to pre-process group-by attribute value with
      @groupsort     = '@group desc'           # group-by sorting clause (to sort groups in result set with)
      @groupdistinct = ''                      # group-by count-distinct attribute
      @maxmatches    = 1000                    # max matches to retrieve
      @cutoff        = 0                       # cutoff to stop searching at (default is 0)
      @retrycount    = 0                       # distributed retries count
      @retrydelay    = 0                       # distributed retries delay
      @anchor        = []                      # geographical anchor point
      @indexweights  = []                      # per-index weights
      @ranker        = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
      @maxquerytime  = 0                       # max query time, milliseconds (default is 0, do not limit) 
      @fieldweights  = {}                      # per-field-name weights
    
      # per-reply fields (for single-query case)
      @error         = ''                      # last error message
      @warning       = ''                      # last warning message
      
      @reqs          = []                      # requests storage (for multi-query case)
      @mbenc         = ''                      # stored mbstring encoding
    end
  
    # Get last error message.
    def GetLastError
      @error
    end
    
    # Get last warning message.
    def GetLastWarning
      @warning
    end
    
    # Set searchd host name (string) and port (integer).
    def SetServer(host, port)
      assert { host.instance_of? String }
      assert { port.instance_of? Fixnum }

      @host = host
      @port = port
    end
   
    # Set offset and count into result set,
    # and optionally set max-matches and cutoff limits.
    def SetLimits(offset, limit, max = 0, cutoff = 0)
      assert { offset.instance_of? Fixnum }
      assert { limit.instance_of? Fixnum }
      assert { max.instance_of? Fixnum }
      assert { offset >= 0 }
      assert { limit > 0 }
      assert { max >= 0 }

      @offset = offset
      @limit = limit
      @maxmatches = max if max > 0
      @cutoff = cutoff if cutoff > 0
    end
    
    # Set maximum query time, in milliseconds, per-index,
    # integer, 0 means "do not limit"
    def SetMaxQueryTime(max)
      assert { max.instance_of? Fixnum }
      assert { max >= 0 }
      @maxquerytime = max
    end
    
    # Set matching mode.
    def SetMatchMode(mode)
      assert { mode == SPH_MATCH_ALL \
            || mode == SPH_MATCH_ANY \
            || mode == SPH_MATCH_PHRASE \
            || mode == SPH_MATCH_BOOLEAN \
            || mode == SPH_MATCH_EXTENDED \
            || mode == SPH_MATCH_FULLSCAN \
            || mode == SPH_MATCH_EXTENDED2 }

      @mode = mode
    end
    
    # Set ranking mode.
    def SetRankingMode(ranker)
      assert { ranker == SPH_RANK_PROXIMITY_BM25 \
            || ranker == SPH_RANK_BM25 \
            || ranker == SPH_RANK_NONE \
            || ranker == SPH_RANK_WORDCOUNT }

      @ranker = ranker
    end
    
    # Set matches sorting mode.
    def SetSortMode(mode, sortby = '')
      assert { mode == SPH_SORT_RELEVANCE \
            || mode == SPH_SORT_ATTR_DESC \
            || mode == SPH_SORT_ATTR_ASC \
            || mode == SPH_SORT_TIME_SEGMENTS \
            || mode == SPH_SORT_EXTENDED \
            || mode == SPH_SORT_EXPR }
      assert { sortby.instance_of? String }
      assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }

      @sort = mode
      @sortby = sortby
    end
    
    # Bind per-field weights by order.
    #
    # DEPRECATED; use SetFieldWeights() instead.
    def SetWeights(weights)
      assert { weights.instance_of? Array }
      weights.each do |weight|
        assert { weight.instance_of? Fixnum }
      end

      @weights = weights
    end

    # Bind per-field weights by name.
    #
    # Takes string (field name) to integer name (field weight) hash as an argument.
    # * Takes precedence over SetWeights().
    # * Unknown names will be silently ignored.
    # * Unbound fields will be silently given a weight of 1.
    def SetFieldWeights(weights)
      assert { weights.instance_of? Hash }
      weights.each do |name, weight|
        assert { name.instance_of? String }
        assert { weight.instance_of? Fixnum }
      end

      @fieldweights = weights
    end
    
    # Bind per-index weights by name.
    def SetIndexWeights(weights)
      assert { weights.instance_of? Hash }
      weights.each do |index, weight|
        assert { index.instance_of? String }
        assert { weight.instance_of? Fixnum }
      end
      
      @indexweights = weights
    end
    
    # Set IDs range to match.
    # 
    # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive). 
    def SetIDRange(min, max)
      assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
      assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
      assert { min <= max }

      @min_id = min
      @max_id = max
    end
    
    # Set values filter.
    # 
    # Only match those records where <tt>attribute</tt> column values
    # are in specified set.
    def SetFilter(attribute, values, exclude = false)
      assert { attribute.instance_of? String }
      assert { values.instance_of? Array }
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -