|
|
|
|
"""
|
|
|
|
|
sphinx.search.ru
|
|
|
|
|
~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
Russian search language: includes the JS Russian stemmer.
|
|
|
|
|
|
|
|
|
|
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
|
|
|
|
|
:license: BSD, see LICENSE for details.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
|
|
import snowballstemmer
|
|
|
|
|
|
|
|
|
|
from sphinx.search import SearchLanguage, parse_stop_word
|
|
|
|
|
|
|
|
|
|
russian_stopwords = parse_stop_word('''
|
|
|
|
|
| source: http://snowball.tartarus.org/algorithms/russian/stop.txt
|
|
|
|
|
и | and
|
|
|
|
|
в | in/into
|
|
|
|
|
во | alternative form
|
|
|
|
|
не | not
|
|
|
|
|
что | what/that
|
|
|
|
|
он | he
|
|
|
|
|
на | on/onto
|
|
|
|
|
я | i
|
|
|
|
|
с | from
|
|
|
|
|
со | alternative form
|
|
|
|
|
как | how
|
|
|
|
|
а | milder form of `no' (but)
|
|
|
|
|
то | conjunction and form of `that'
|
|
|
|
|
все | all
|
|
|
|
|
она | she
|
|
|
|
|
так | so, thus
|
|
|
|
|
его | him
|
|
|
|
|
но | but
|
|
|
|
|
да | yes/and
|
|
|
|
|
ты | thou
|
|
|
|
|
к | towards, by
|
|
|
|
|
у | around, chez
|
|
|
|
|
же | intensifier particle
|
|
|
|
|
вы | you
|
|
|
|
|
за | beyond, behind
|
|
|
|
|
бы | conditional/subj. particle
|
|
|
|
|
по | up to, along
|
|
|
|
|
только | only
|
|
|
|
|
ее | her
|
|
|
|
|
мне | to me
|
|
|
|
|
было | it was
|
|
|
|
|
вот | here is/are, particle
|
|
|
|
|
от | away from
|
|
|
|
|
меня | me
|
|
|
|
|
еще | still, yet, more
|
|
|
|
|
нет | no, there isnt/arent
|
|
|
|
|
о | about
|
|
|
|
|
из | out of
|
|
|
|
|
ему | to him
|
|
|
|
|
теперь | now
|
|
|
|
|
когда | when
|
|
|
|
|
даже | even
|
|
|
|
|
ну | so, well
|
|
|
|
|
вдруг | suddenly
|
|
|
|
|
ли | interrogative particle
|
|
|
|
|
если | if
|
|
|
|
|
уже | already, but homonym of `narrower'
|
|
|
|
|
или | or
|
|
|
|
|
ни | neither
|
|
|
|
|
быть | to be
|
|
|
|
|
был | he was
|
|
|
|
|
него | prepositional form of его
|
|
|
|
|
до | up to
|
|
|
|
|
вас | you accusative
|
|
|
|
|
нибудь | indef. suffix preceded by hyphen
|
|
|
|
|
опять | again
|
|
|
|
|
уж | already, but homonym of `adder'
|
|
|
|
|
вам | to you
|
|
|
|
|
сказал | he said
|
|
|
|
|
ведь | particle `after all'
|
|
|
|
|
там | there
|
|
|
|
|
потом | then
|
|
|
|
|
себя | oneself
|
|
|
|
|
ничего | nothing
|
|
|
|
|
ей | to her
|
|
|
|
|
может | usually with `быть' as `maybe'
|
|
|
|
|
они | they
|
|
|
|
|
тут | here
|
|
|
|
|
где | where
|
|
|
|
|
есть | there is/are
|
|
|
|
|
надо | got to, must
|
|
|
|
|
ней | prepositional form of ей
|
|
|
|
|
для | for
|
|
|
|
|
мы | we
|
|
|
|
|
тебя | thee
|
|
|
|
|
их | them, their
|
|
|
|
|
чем | than
|
|
|
|
|
была | she was
|
|
|
|
|
сам | self
|
|
|
|
|
чтоб | in order to
|
|
|
|
|
без | without
|
|
|
|
|
будто | as if
|
|
|
|
|
человек | man, person, one
|
|
|
|
|
чего | genitive form of `what'
|
|
|
|
|
раз | once
|
|
|
|
|
тоже | also
|
|
|
|
|
себе | to oneself
|
|
|
|
|
под | beneath
|
|
|
|
|
жизнь | life
|
|
|
|
|
будет | will be
|
|
|
|
|
ж | short form of intensifer particle `же'
|
|
|
|
|
тогда | then
|
|
|
|
|
кто | who
|
|
|
|
|
этот | this
|
|
|
|
|
говорил | was saying
|
|
|
|
|
того | genitive form of `that'
|
|
|
|
|
потому | for that reason
|
|
|
|
|
этого | genitive form of `this'
|
|
|
|
|
какой | which
|
|
|
|
|
совсем | altogether
|
|
|
|
|
ним | prepositional form of `его', `они'
|
|
|
|
|
здесь | here
|
|
|
|
|
этом | prepositional form of `этот'
|
|
|
|
|
один | one
|
|
|
|
|
почти | almost
|
|
|
|
|
мой | my
|
|
|
|
|
тем | instrumental/dative plural of `тот', `то'
|
|
|
|
|
чтобы | full form of `in order that'
|
|
|
|
|
нее | her (acc.)
|
|
|
|
|
кажется | it seems
|
|
|
|
|
сейчас | now
|
|
|
|
|
были | they were
|
|
|
|
|
куда | where to
|
|
|
|
|
зачем | why
|
|
|
|
|
сказать | to say
|
|
|
|
|
всех | all (acc., gen. preposn. plural)
|
|
|
|
|
никогда | never
|
|
|
|
|
сегодня | today
|
|
|
|
|
можно | possible, one can
|
|
|
|
|
при | by
|
|
|
|
|
наконец | finally
|
|
|
|
|
два | two
|
|
|
|
|
об | alternative form of `о', about
|
|
|
|
|
другой | another
|
|
|
|
|
хоть | even
|
|
|
|
|
после | after
|
|
|
|
|
над | above
|
|
|
|
|
больше | more
|
|
|
|
|
тот | that one (masc.)
|
|
|
|
|
через | across, in
|
|
|
|
|
эти | these
|
|
|
|
|
нас | us
|
|
|
|
|
про | about
|
|
|
|
|
всего | in all, only, of all
|
|
|
|
|
них | prepositional form of `они' (they)
|
|
|
|
|
какая | which, feminine
|
|
|
|
|
много | lots
|
|
|
|
|
разве | interrogative particle
|
|
|
|
|
сказала | she said
|
|
|
|
|
три | three
|
|
|
|
|
эту | this, acc. fem. sing.
|
|
|
|
|
моя | my, feminine
|
|
|
|
|
впрочем | moreover, besides
|
|
|
|
|
хорошо | good
|
|
|
|
|
свою | ones own, acc. fem. sing.
|
|
|
|
|
этой | oblique form of `эта', fem. `this'
|
|
|
|
|
перед | in front of
|
|
|
|
|
иногда | sometimes
|
|
|
|
|
лучше | better
|
|
|
|
|
чуть | a little
|
|
|
|
|
том | preposn. form of `that one'
|
|
|
|
|
нельзя | one must not
|
|
|
|
|
такой | such a one
|
|
|
|
|
им | to them
|
|
|
|
|
более | more
|
|
|
|
|
всегда | always
|
|
|
|
|
конечно | of course
|
|
|
|
|
всю | acc. fem. sing of `all'
|
|
|
|
|
между | between
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| b: some paradigms
|
|
|
|
|
|
|
|
|
|
|
| personal pronouns
|
|
|
|
|
|
|
|
|
|
|
| я меня мне мной [мною]
|
|
|
|
|
| ты тебя тебе тобой [тобою]
|
|
|
|
|
| он его ему им [него, нему, ним]
|
|
|
|
|
| она ее эи ею [нее, нэи, нею]
|
|
|
|
|
| оно его ему им [него, нему, ним]
|
|
|
|
|
|
|
|
|
|
|
| мы нас нам нами
|
|
|
|
|
| вы вас вам вами
|
|
|
|
|
| они их им ими [них, ним, ними]
|
|
|
|
|
|
|
|
|
|
|
| себя себе собой [собою]
|
|
|
|
|
|
|
|
|
|
|
| demonstrative pronouns: этот (this), тот (that)
|
|
|
|
|
|
|
|
|
|
|
| этот эта это эти
|
|
|
|
|
| этого эты это эти
|
|
|
|
|
| этого этой этого этих
|
|
|
|
|
| этому этой этому этим
|
|
|
|
|
| этим этой этим [этою] этими
|
|
|
|
|
| этом этой этом этих
|
|
|
|
|
|
|
|
|
|
|
| тот та то те
|
|
|
|
|
| того ту то те
|
|
|
|
|
| того той того тех
|
|
|
|
|
| тому той тому тем
|
|
|
|
|
| тем той тем [тою] теми
|
|
|
|
|
| том той том тех
|
|
|
|
|
|
|
|
|
|
|
| determinative pronouns
|
|
|
|
|
|
|
|
|
|
|
| (a) весь (all)
|
|
|
|
|
|
|
|
|
|
|
| весь вся все все
|
|
|
|
|
| всего всю все все
|
|
|
|
|
| всего всей всего всех
|
|
|
|
|
| всему всей всему всем
|
|
|
|
|
| всем всей всем [всею] всеми
|
|
|
|
|
| всем всей всем всех
|
|
|
|
|
|
|
|
|
|
|
| (b) сам (himself etc)
|
|
|
|
|
|
|
|
|
|
|
| сам сама само сами
|
|
|
|
|
| самого саму само самих
|
|
|
|
|
| самого самой самого самих
|
|
|
|
|
| самому самой самому самим
|
|
|
|
|
| самим самой самим [самою] самими
|
|
|
|
|
| самом самой самом самих
|
|
|
|
|
|
|
|
|
|
|
| stems of verbs `to be', `to have', `to do' and modal
|
|
|
|
|
|
|
|
|
|
|
| быть бы буд быв есть суть
|
|
|
|
|
| име
|
|
|
|
|
| дел
|
|
|
|
|
| мог мож мочь
|
|
|
|
|
| уме
|
|
|
|
|
| хоч хот
|
|
|
|
|
| долж
|
|
|
|
|
| можн
|
|
|
|
|
| нужн
|
|
|
|
|
| нельзя
|
|
|
|
|
''')
|
|
|
|
|
|
|
|
|
|
js_stemmer = """
|
|
|
|
|
var JSX={};(function(h){function j(b,e){var a=function(){};a.prototype=e.prototype;var c=new a;for(var d in b){b[d].prototype=c}}function J(c,b){for(var a in b.prototype)if(b.prototype.hasOwnProperty(a))c.prototype[a]=b.prototype[a]}function f(a,b,d){function c(a,b,c){delete a[b];a[b]=c;return c}Object.defineProperty(a,b,{get:function(){return c(a,b,d())},set:function(d){c(a,b,d)},enumerable:true,configurable:true})}function K(a,b,c){return a[b]=a[b]/c|0}var p=parseInt;var z=parseFloat;function L(a){return a!==a}var x=isFinite;var w=encodeURIComponent;var u=decodeURIComponent;var t=encodeURI;var s=decodeURI;var B=Object.prototype.toString;var q=Object.prototype.hasOwnProperty;function i(){}h.require=function(b){var a=o[b];return a!==undefined?a:null};h.profilerIsRunning=function(){return i.getResults!=null};h.getProfileResults=function(){return(i.getResults||function(){return{}})()};h.postProfileResults=function(a,b){if(i.postResults==null)throw new Error('profiler has not been turned on');return i.postResults(a,b)};h.resetProfileResults=function(){if(i.resetResults==null)throw new Error('profiler has not been turned on');return i.resetResults()};h.DEBUG=false;function r(){};j([r],Error);function a(a,b,c){this.G=a.length;this.X=a;this.a=b;this.J=c;this.I=null;this.b=null};j([a],Object);function m(){};j([m],Object);function g(){var a;var b;var c;this.F={};a=this.D='';b=this._=0;c=this.A=a.length;this.E=0;this.B=b;this.C=c};j([g],m);function v(a,b){a.D=b.D;a._=b._;a.A=b.A;a.E=b.E;a.B=b.B;a.C=b.C};function k(b,d,c,e){var a;if(b._>=b.A){return false}a=b.D.charCodeAt(b._);if(a>e||a<c){return false}a-=c;if((d[a>>>3]&1<<(a&7))===0){return false}b._++;return true};function l(a,d,c,e){var b;if(a._>=a.A){return false}b=a.D.charCodeAt(a._);if(b>e||b<c){a._++;return true}b-=c;if((d[b>>>3]&1<<(b&7))===0){a._++;return true}return false};function d(a,b,d){var c;if(a._-a.E<b){return false}if(a.D.slice((c=a._)-b,c)!==d){return false}a._-=b;return true};function e(d,m,p){var b;var g;var e;var n;var f;var k;var l;var i;var h;var c;var a;var j;var o;b=0;g=p;e=d._;n=d.E;f=0;k=0;l=false;while(true){i=b+(g-b>>1);h=0;c=f<k?f:k;a=m[i];for(j=a.G-1-c;j>=0;j--){if(e-c===n){h=-1;break}h=d.D.charCodeAt(e-1-c)-a.X.charCodeAt(j);if(h!==0){break}c++}if(h<0){g=i;k=c}else{b=i;f=c}if(g-b<=1){if(b>0){break}if(g===b){break}if(l){break}l=true}}while(true){a=m[b];if(f>=a.G){d._=e-a.G|0;if(a.I==null){return a.J}o=a.I(d);d._=e-a.G|0;if(o){return a.J}}b=a.a;if(b<0){return 0}}return-1};function A(a,b,d,e){var c;c=e.length-(d-b);a.D=a.D.slice(0,b)+e+a.D.slice(d);a.A+=c|0;if(a._>=d){a._+=c|0}else if(a._>b){a._=b}return c|0};function c(a,f){var b;var c;var d;var e;b=false;if((c=a.B)<0||c>(d=a.C)||d>(e=a.A)||e>a.D.length?false:true){A(a,a.B,a.C,f);b=true}return b};g.prototype.H=function(){return false};g.prototype.Y=function(b){var a;var c;var d;var e;a=this.F['.'+b];if(a==null){c=this.D=b;d=this._=0;e=this.A=c.length;this.E=0;this.B=d;this.C=e;this.H();a=this.D;this.F['.'+b]=a}return a};g.prototype.stemWord=g.prototype.Y;g.prototype.Z=function(e){var d;var b;var c;var a;var f;var g;var h;d=[];for(b=0;b<e.length;b++){c=e[b];a=this.F['.'+c];if(a==null){f=this.D=c;g=this._=0;h=this.A=f.length;this.E=0;this.B=g;this.C=h;this.H();a=this.D;this.F['.'+c]=a}d.push(a)}return d};g.prototype.stemWords=g.prototype.Z;function b(){g.call(this);this.I_p2=0;this.I_pV=0};j([b],g);b.prototype.K=function(a){this.I_p2=a.I_p2;this.I_pV=a.I_pV;v(this,a)};b.prototype.copy_from=b.prototype.K;b.prototype.R=function(){var g;var a;var c;var d;var e;var f;var h;this.I_pV=h=this.A;this.I_p2=h;g=this._;a=true;a:while(a===true){a=false;b:while(true){c=true;c:while(c===true){c=false;if(!k(this,b.g_v,1072,1103)){break c}break b}if(this._>=this.A){break a}this._++}this.I_pV=this._;b:while(true){d=true;c:while(d===true){d=false;if(!l(this,b.g_v,1072,1103)){break c}break b}if(this._>=this.A){break a}this._++}b:while(true){e=true;c:while(e===true){e=false;if(!k(this,b.g_v,1072,1103)){break c}break b}if(this._>=this.A){break a}this._++}b:while(true){f=true;c:while(f===true){f=false;if(!l(this,b.g_v,1072
|
|
|
|
|
var Stemmer = JSX.require("src/russian-stemmer.jsx").RussianStemmer;
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SearchRussian(SearchLanguage):
|
|
|
|
|
lang = 'ru'
|
|
|
|
|
language_name = 'Russian'
|
|
|
|
|
js_stemmer_rawcode = 'russian-stemmer.js'
|
|
|
|
|
js_stemmer_code = js_stemmer
|
|
|
|
|
stopwords = russian_stopwords
|
|
|
|
|
|
|
|
|
|
def init(self, options: Dict) -> None:
|
|
|
|
|
self.stemmer = snowballstemmer.stemmer('russian')
|
|
|
|
|
|
|
|
|
|
def stem(self, word: str) -> str:
|
|
|
|
|
return self.stemmer.stemWord(word.lower())
|