You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/Resources/WPy64-3720/python-3.7.2.amd64/Lib/site-packages/sphinx/search/nl.py

140 lines
21 KiB

"""
sphinx.search.nl
~~~~~~~~~~~~~~~~
Dutch search language: includes the JS porter stemmer.
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from typing import Dict
import snowballstemmer
from sphinx.search import SearchLanguage, parse_stop_word
dutch_stopwords = parse_stop_word('''
| source: http://snowball.tartarus.org/algorithms/dutch/stop.txt
de | the
en | and
van | of, from
ik | I, the ego
te | (1) chez, at etc, (2) to, (3) too
dat | that, which
die | that, those, who, which
in | in, inside
een | a, an, one
hij | he
het | the, it
niet | not, nothing, naught
zijn | (1) to be, being, (2) his, one's, its
is | is
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
op | on, upon, at, in, up, used up
aan | on, upon, to (as dative)
met | with, by
als | like, such as, when
voor | (1) before, in front of, (2) furrow
had | had, past tense all persons sing. of 'hebben' (have)
er | there
maar | but, only
om | round, about, for etc
hem | him
dan | then
zou | should/would, past tense all persons sing. of 'zullen'
of | or, whether, if
wat | what, something, anything
mijn | possessive and noun 'mine'
men | people, 'one'
dit | this
zo | so, thus, in this way
door | through by
over | over, across
ze | she, her, they, them
zich | oneself
bij | (1) a bee, (2) by, near, at
ook | also, too
tot | till, until
je | you
mij | me
uit | out of, from
der | Old Dutch form of 'van der' still found in surnames
daar | (1) there, (2) because
haar | (1) her, their, them, (2) hair
naar | (1) unpleasant, unwell etc, (2) towards, (3) as
heb | present first person sing. of 'to have'
hoe | how, why
heeft | present third person sing. of 'to have'
hebben | 'to have' and various parts thereof
deze | this
u | you
want | (1) for, (2) mitten, (3) rigging
nog | yet, still
zal | 'shall', first and third person sing. of verb 'zullen' (will)
me | me
zij | she, they
nu | now
ge | 'thou', still used in Belgium and south Netherlands
geen | none
omdat | because
iets | something, somewhat
worden | to become, grow, get
toch | yet, still
al | all, every, each
waren | (1) 'were' (2) to wander, (3) wares, (3)
veel | much, many
meer | (1) more, (2) lake
doen | to do, to make
toen | then, when
moet | noun 'spot/mote' and present form of 'to must'
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
zonder | without
kan | noun 'can' and present form of 'to be able'
hun | their, them
dus | so, consequently
alles | all, everything, anything
onder | under, beneath
ja | yes, of course
eens | once, one day
hier | here
wie | who
werd | imperfect third person sing. of 'become'
altijd | always
doch | yet, but etc
wordt | present third person sing. of 'become'
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
kunnen | to be able
ons | us/our
zelf | self
tegen | against, towards, at
na | after, near
reeds | already
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
kon | could; past tense of 'to be able'
niets | nothing
uw | your
iemand | somebody
geweest | been; past participle of 'be'
andere | other
''')
js_stemmer = """
var JSX={};(function(m){function n(b,e){var a=function(){};a.prototype=e.prototype;var c=new a;for(var d in b){b[d].prototype=c}}function L(c,b){for(var a in b.prototype)if(b.prototype.hasOwnProperty(a))c.prototype[a]=b.prototype[a]}function e(a,b,d){function c(a,b,c){delete a[b];a[b]=c;return c}Object.defineProperty(a,b,{get:function(){return c(a,b,d())},set:function(d){c(a,b,d)},enumerable:true,configurable:true})}function K(a,b,c){return a[b]=a[b]/c|0}var I=parseInt;var E=parseFloat;function M(a){return a!==a}var B=isFinite;var A=encodeURIComponent;var z=decodeURIComponent;var y=encodeURI;var x=decodeURI;var w=Object.prototype.toString;var C=Object.prototype.hasOwnProperty;function l(){}m.require=function(b){var a=t[b];return a!==undefined?a:null};m.profilerIsRunning=function(){return l.getResults!=null};m.getProfileResults=function(){return(l.getResults||function(){return{}})()};m.postProfileResults=function(a,b){if(l.postResults==null)throw new Error('profiler has not been turned on');return l.postResults(a,b)};m.resetProfileResults=function(){if(l.resetResults==null)throw new Error('profiler has not been turned on');return l.resetResults()};m.DEBUG=false;function v(){};n([v],Error);function c(a,b,c){this.F=a.length;this.K=a;this.L=b;this.I=c;this.H=null;this.P=null};n([c],Object);function s(){};n([s],Object);function g(){var a;var b;var c;this.G={};a=this.D='';b=this._=0;c=this.A=a.length;this.E=0;this.C=b;this.B=c};n([g],s);function D(a,b){a.D=b.D;a._=b._;a.A=b.A;a.E=b.E;a.C=b.C;a.B=b.B};function i(b,d,c,e){var a;if(b._>=b.A){return false}a=b.D.charCodeAt(b._);if(a>e||a<c){return false}a-=c;if((d[a>>>3]&1<<(a&7))===0){return false}b._++;return true};function r(a,d,c,e){var b;if(a._>=a.A){return false}b=a.D.charCodeAt(a._);if(b>e||b<c){a._++;return true}b-=c;if((d[b>>>3]&1<<(b&7))===0){a._++;return true}return false};function f(a,d,c,e){var b;if(a._<=a.E){return false}b=a.D.charCodeAt(a._-1);if(b>e||b<c){a._--;return true}b-=c;if((d[b>>>3]&1<<(b&7))===0){a._--;return true}return false};function k(a,b,d){var c;if(a.A-a._<b){return false}if(a.D.slice(c=a._,c+b)!==d){return false}a._+=b;return true};function d(a,b,d){var c;if(a._-a.E<b){return false}if(a.D.slice((c=a._)-b,c)!==d){return false}a._-=b;return true};function q(f,m,p){var b;var d;var e;var n;var g;var k;var l;var i;var h;var c;var a;var j;var o;b=0;d=p;e=f._;n=f.A;g=0;k=0;l=false;while(true){i=b+(d-b>>>1);h=0;c=g<k?g:k;a=m[i];for(j=c;j<a.F;j++){if(e+c===n){h=-1;break}h=f.D.charCodeAt(e+c)-a.K.charCodeAt(j);if(h!==0){break}c++}if(h<0){d=i;k=c}else{b=i;g=c}if(d-b<=1){if(b>0){break}if(d===b){break}if(l){break}l=true}}while(true){a=m[b];if(g>=a.F){f._=e+a.F|0;if(a.H==null){return a.I}o=a.H(a.P);f._=e+a.F|0;if(o){return a.I}}b=a.L;if(b<0){return 0}}return-1};function h(d,m,p){var b;var g;var e;var n;var f;var k;var l;var i;var h;var c;var a;var j;var o;b=0;g=p;e=d._;n=d.E;f=0;k=0;l=false;while(true){i=b+(g-b>>1);h=0;c=f<k?f:k;a=m[i];for(j=a.F-1-c;j>=0;j--){if(e-c===n){h=-1;break}h=d.D.charCodeAt(e-1-c)-a.K.charCodeAt(j);if(h!==0){break}c++}if(h<0){g=i;k=c}else{b=i;f=c}if(g-b<=1){if(b>0){break}if(g===b){break}if(l){break}l=true}}while(true){a=m[b];if(f>=a.F){d._=e-a.F|0;if(a.H==null){return a.I}o=a.H(d);d._=e-a.F|0;if(o){return a.I}}b=a.L;if(b<0){return 0}}return-1};function u(a,b,d,e){var c;c=e.length-(d-b);a.D=a.D.slice(0,b)+e+a.D.slice(d);a.A+=c|0;if(a._>=d){a._+=c|0}else if(a._>b){a._=b}return c|0};function b(a,f){var b;var c;var d;var e;b=false;if((c=a.C)<0||c>(d=a.B)||d>(e=a.A)||e>a.D.length?false:true){u(a,a.C,a.B,f);b=true}return b};g.prototype.J=function(){return false};g.prototype.Z=function(b){var a;var c;var d;var e;a=this.G['.'+b];if(a==null){c=this.D=b;d=this._=0;e=this.A=c.length;this.E=0;this.C=d;this.B=e;this.J();a=this.D;this.G['.'+b]=a}return a};g.prototype.stemWord=g.prototype.Z;g.prototype.a=function(e){var d;var b;var c;var a;var f;var g;var h;d=[];for(b=0;b<e.length;b++){c=e[b];a=this.G['.'+c];if(a==null){f=this.D=c;g=this._=0;h=this.A=f.length;this.E=0;this.C=g;this.B=h;this.J();a=this.D;this.G['.'+c]=a}d.push(a)}return d};g.prototype.st
var Stemmer = JSX.require("src/dutch-stemmer.jsx").DutchStemmer;
"""
class SearchDutch(SearchLanguage):
lang = 'nl'
language_name = 'Dutch'
js_stemmer_rawcode = 'dutch-stemmer.js'
js_stemmer_code = js_stemmer
stopwords = dutch_stopwords
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('dutch')
def stem(self, word: str) -> str:
return self.stemmer.stemWord(word.lower())