You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
153 lines
12 KiB
153 lines
12 KiB
4 years ago
|
"""
|
||
|
sphinx.search.sv
|
||
|
~~~~~~~~~~~~~~~~
|
||
|
|
||
|
Swedish search language: includes the JS Swedish stemmer.
|
||
|
|
||
|
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
|
||
|
:license: BSD, see LICENSE for details.
|
||
|
"""
|
||
|
|
||
|
from typing import Dict
|
||
|
|
||
|
import snowballstemmer
|
||
|
|
||
|
from sphinx.search import SearchLanguage, parse_stop_word
|
||
|
|
||
|
swedish_stopwords = parse_stop_word('''
|
||
|
| source: http://snowball.tartarus.org/algorithms/swedish/stop.txt
|
||
|
och | and
|
||
|
det | it, this/that
|
||
|
att | to (with infinitive)
|
||
|
i | in, at
|
||
|
en | a
|
||
|
jag | I
|
||
|
hon | she
|
||
|
som | who, that
|
||
|
han | he
|
||
|
på | on
|
||
|
den | it, this/that
|
||
|
med | with
|
||
|
var | where, each
|
||
|
sig | him(self) etc
|
||
|
för | for
|
||
|
så | so (also: seed)
|
||
|
till | to
|
||
|
är | is
|
||
|
men | but
|
||
|
ett | a
|
||
|
om | if; around, about
|
||
|
hade | had
|
||
|
de | they, these/those
|
||
|
av | of
|
||
|
icke | not, no
|
||
|
mig | me
|
||
|
du | you
|
||
|
henne | her
|
||
|
då | then, when
|
||
|
sin | his
|
||
|
nu | now
|
||
|
har | have
|
||
|
inte | inte någon = no one
|
||
|
hans | his
|
||
|
honom | him
|
||
|
skulle | 'sake'
|
||
|
hennes | her
|
||
|
där | there
|
||
|
min | my
|
||
|
man | one (pronoun)
|
||
|
ej | nor
|
||
|
vid | at, by, on (also: vast)
|
||
|
kunde | could
|
||
|
något | some etc
|
||
|
från | from, off
|
||
|
ut | out
|
||
|
när | when
|
||
|
efter | after, behind
|
||
|
upp | up
|
||
|
vi | we
|
||
|
dem | them
|
||
|
vara | be
|
||
|
vad | what
|
||
|
över | over
|
||
|
än | than
|
||
|
dig | you
|
||
|
kan | can
|
||
|
sina | his
|
||
|
här | here
|
||
|
ha | have
|
||
|
mot | towards
|
||
|
alla | all
|
||
|
under | under (also: wonder)
|
||
|
någon | some etc
|
||
|
eller | or (else)
|
||
|
allt | all
|
||
|
mycket | much
|
||
|
sedan | since
|
||
|
ju | why
|
||
|
denna | this/that
|
||
|
själv | myself, yourself etc
|
||
|
detta | this/that
|
||
|
åt | to
|
||
|
utan | without
|
||
|
varit | was
|
||
|
hur | how
|
||
|
ingen | no
|
||
|
mitt | my
|
||
|
ni | you
|
||
|
bli | to be, become
|
||
|
blev | from bli
|
||
|
oss | us
|
||
|
din | thy
|
||
|
dessa | these/those
|
||
|
några | some etc
|
||
|
deras | their
|
||
|
blir | from bli
|
||
|
mina | my
|
||
|
samma | (the) same
|
||
|
vilken | who, that
|
||
|
er | you, your
|
||
|
sådan | such a
|
||
|
vår | our
|
||
|
blivit | from bli
|
||
|
dess | its
|
||
|
inom | within
|
||
|
mellan | between
|
||
|
sådant | such a
|
||
|
varför | why
|
||
|
varje | each
|
||
|
vilka | who, that
|
||
|
ditt | thy
|
||
|
vem | who
|
||
|
vilket | who, that
|
||
|
sitta | his
|
||
|
sådana | such a
|
||
|
vart | each
|
||
|
dina | thy
|
||
|
vars | whose
|
||
|
vårt | our
|
||
|
våra | our
|
||
|
ert | your
|
||
|
era | your
|
||
|
vilkas | whose
|
||
|
''')
|
||
|
|
||
|
js_stemmer = """
|
||
|
var JSX={};(function(e){function i(b,e){var a=function(){};a.prototype=e.prototype;var c=new a;for(var d in b){b[d].prototype=c}}function G(c,b){for(var a in b.prototype)if(b.prototype.hasOwnProperty(a))c.prototype[a]=b.prototype[a]}function h(a,b,d){function c(a,b,c){delete a[b];a[b]=c;return c}Object.defineProperty(a,b,{get:function(){return c(a,b,d())},set:function(d){c(a,b,d)},enumerable:true,configurable:true})}function F(a,b,c){return a[b]=a[b]/c|0}var t=parseInt;var u=parseFloat;function E(a){return a!==a}var x=isFinite;var y=encodeURIComponent;var z=decodeURIComponent;var B=encodeURI;var C=decodeURI;var o=Object.prototype.toString;var p=Object.prototype.hasOwnProperty;function f(){}e.require=function(b){var a=n[b];return a!==undefined?a:null};e.profilerIsRunning=function(){return f.getResults!=null};e.getProfileResults=function(){return(f.getResults||function(){return{}})()};e.postProfileResults=function(a,b){if(f.postResults==null)throw new Error('profiler has not been turned on');return f.postResults(a,b)};e.resetProfileResults=function(){if(f.resetResults==null)throw new Error('profiler has not been turned on');return f.resetResults()};e.DEBUG=false;function r(){};i([r],Error);function a(a,b,c){this.G=a.length;this.R=a;this.U=b;this.J=c;this.I=null;this.V=null};i([a],Object);function j(){};i([j],Object);function d(){var a;var b;var c;this.F={};a=this.C='';b=this._=0;c=this.B=a.length;this.A=0;this.D=b;this.E=c};i([d],j);function v(a,b){a.C=b.C;a._=b._;a.B=b.B;a.A=b.A;a.D=b.D;a.E=b.E};function k(b,d,c,e){var a;if(b._>=b.B){return false}a=b.C.charCodeAt(b._);if(a>e||a<c){return false}a-=c;if((d[a>>>3]&1<<(a&7))===0){return false}b._++;return true};function l(b,d,c,e){var a;if(b._<=b.A){return false}a=b.C.charCodeAt(b._-1);if(a>e||a<c){return false}a-=c;if((d[a>>>3]&1<<(a&7))===0){return false}b._--;return true};function m(a,d,c,e){var b;if(a._>=a.B){return false}b=a.C.charCodeAt(a._);if(b>e||b<c){a._++;return true}b-=c;if((d[b>>>3]&1<<(b&7))===0){a._++;return true}return false};function g(d,m,p){var b;var g;var e;var n;var f;var k;var l;var i;var h;var c;var a;var j;var o;b=0;g=p;e=d._;n=d.A;f=0;k=0;l=false;while(true){i=b+(g-b>>1);h=0;c=f<k?f:k;a=m[i];for(j=a.G-1-c;j>=0;j--){if(e-c===n){h=-1;break}h=d.C.charCodeAt(e-1-c)-a.R.charCodeAt(j);if(h!==0){break}c++}if(h<0){g=i;k=c}else{b=i;f=c}if(g-b<=1){if(b>0){break}if(g===b){break}if(l){break}l=true}}while(true){a=m[b];if(f>=a.G){d._=e-a.G|0;if(a.I==null){return a.J}o=a.I(d);d._=e-a.G|0;if(o){return a.J}}b=a.U;if(b<0){return 0}}return-1};function A(a,b,d,e){var c;c=e.length-(d-b);a.C=a.C.slice(0,b)+e+a.C.slice(d);a.B+=c|0;if(a._>=d){a._+=c|0}else if(a._>b){a._=b}return c|0};function c(a,f){var b;var c;var d;var e;b=false;if((c=a.D)<0||c>(d=a.E)||d>(e=a.B)||e>a.C.length?false:true){A(a,a.D,a.E,f);b=true}return b};d.prototype.H=function(){return false};d.prototype.S=function(b){var a;var c;var d;var e;a=this.F['.'+b];if(a==null){c=this.C=b;d=this._=0;e=this.B=c.length;this.A=0;this.D=d;this.E=e;this.H();a=this.C;this.F['.'+b]=a}return a};d.prototype.stemWord=d.prototype.S;d.prototype.T=function(e){var d;var b;var c;var a;var f;var g;var h;d=[];for(b=0;b<e.length;b++){c=e[b];a=this.F['.'+c];if(a==null){f=this.C=c;g=this._=0;h=this.B=f.length;this.A=0;this.D=g;this.E=h;this.H();a=this.C;this.F['.'+c]=a}d.push(a)}return d};d.prototype.stemWords=d.prototype.T;function b(){d.call(this);this.I_x=0;this.I_p1=0};i([b],d);b.prototype.K=function(a){this.I_x=a.I_x;this.I_p1=a.I_p1;v(this,a)};b.prototype.copy_from=b.prototype.K;b.prototype.P=function(){var g;var d;var a;var e;var c;var f;var i;var j;var l;var h;this.I_p1=j=this.B;g=i=this._;a=i+3|0;if(0>a||a>j){return false}h=this._=a;this.I_x=h;this._=g;a:while(true){d=this._;e=true;b:while(e===true){e=false;if(!k(this,b.g_v,97,246)){break b}this._=d;break a}l=this._=d;if(l>=this.B){return false}this._++}a:while(true){c=true;b:while(c===true){c=false;if(!m(this,b.g_v,97,246)){break b}break a}if(this._>=this.B){return false}this._++}this.I_p1=this._;f=true;a:while(f===true){f=false;if(!(this.I_p1<this.I_x)){break a}this.I_p1=th
|
||
|
var Stemmer = JSX.require("src/swedish-stemmer.jsx").SwedishStemmer;
|
||
|
"""
|
||
|
|
||
|
|
||
|
class SearchSwedish(SearchLanguage):
|
||
|
lang = 'sv'
|
||
|
language_name = 'Swedish'
|
||
|
js_stemmer_rawcode = 'swedish-stemmer.js'
|
||
|
js_stemmer_code = js_stemmer
|
||
|
stopwords = swedish_stopwords
|
||
|
|
||
|
def init(self, options: Dict) -> None:
|
||
|
self.stemmer = snowballstemmer.stemmer('swedish')
|
||
|
|
||
|
def stem(self, word: str) -> str:
|
||
|
return self.stemmer.stemWord(word.lower())
|