You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
376 lines
28 KiB
376 lines
28 KiB
4 years ago
|
"""
|
||
|
sphinx.search.es
|
||
|
~~~~~~~~~~~~~~~~
|
||
|
|
||
|
Spanish search language: includes the JS Spanish stemmer.
|
||
|
|
||
|
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
|
||
|
:license: BSD, see LICENSE for details.
|
||
|
"""
|
||
|
|
||
|
from typing import Dict
|
||
|
|
||
|
import snowballstemmer
|
||
|
|
||
|
from sphinx.search import SearchLanguage, parse_stop_word
|
||
|
|
||
|
spanish_stopwords = parse_stop_word('''
|
||
|
|source: http://snowball.tartarus.org/algorithms/spanish/stop.txt
|
||
|
de | from, of
|
||
|
la | the, her
|
||
|
que | who, that
|
||
|
el | the
|
||
|
en | in
|
||
|
y | and
|
||
|
a | to
|
||
|
los | the, them
|
||
|
del | de + el
|
||
|
se | himself, from him etc
|
||
|
las | the, them
|
||
|
por | for, by, etc
|
||
|
un | a
|
||
|
para | for
|
||
|
con | with
|
||
|
no | no
|
||
|
una | a
|
||
|
su | his, her
|
||
|
al | a + el
|
||
|
| es from SER
|
||
|
lo | him
|
||
|
como | how
|
||
|
más | more
|
||
|
pero | pero
|
||
|
sus | su plural
|
||
|
le | to him, her
|
||
|
ya | already
|
||
|
o | or
|
||
|
| fue from SER
|
||
|
este | this
|
||
|
| ha from HABER
|
||
|
sí | himself etc
|
||
|
porque | because
|
||
|
esta | this
|
||
|
| son from SER
|
||
|
entre | between
|
||
|
| está from ESTAR
|
||
|
cuando | when
|
||
|
muy | very
|
||
|
sin | without
|
||
|
sobre | on
|
||
|
| ser from SER
|
||
|
| tiene from TENER
|
||
|
también | also
|
||
|
me | me
|
||
|
hasta | until
|
||
|
hay | there is/are
|
||
|
donde | where
|
||
|
| han from HABER
|
||
|
quien | whom, that
|
||
|
| están from ESTAR
|
||
|
| estado from ESTAR
|
||
|
desde | from
|
||
|
todo | all
|
||
|
nos | us
|
||
|
durante | during
|
||
|
| estados from ESTAR
|
||
|
todos | all
|
||
|
uno | a
|
||
|
les | to them
|
||
|
ni | nor
|
||
|
contra | against
|
||
|
otros | other
|
||
|
| fueron from SER
|
||
|
ese | that
|
||
|
eso | that
|
||
|
| había from HABER
|
||
|
ante | before
|
||
|
ellos | they
|
||
|
e | and (variant of y)
|
||
|
esto | this
|
||
|
mí | me
|
||
|
antes | before
|
||
|
algunos | some
|
||
|
qué | what?
|
||
|
unos | a
|
||
|
yo | I
|
||
|
otro | other
|
||
|
otras | other
|
||
|
otra | other
|
||
|
él | he
|
||
|
tanto | so much, many
|
||
|
esa | that
|
||
|
estos | these
|
||
|
mucho | much, many
|
||
|
quienes | who
|
||
|
nada | nothing
|
||
|
muchos | many
|
||
|
cual | who
|
||
|
| sea from SER
|
||
|
poco | few
|
||
|
ella | she
|
||
|
estar | to be
|
||
|
| haber from HABER
|
||
|
estas | these
|
||
|
| estaba from ESTAR
|
||
|
| estamos from ESTAR
|
||
|
algunas | some
|
||
|
algo | something
|
||
|
nosotros | we
|
||
|
|
||
|
| other forms
|
||
|
|
||
|
mi | me
|
||
|
mis | mi plural
|
||
|
tú | thou
|
||
|
te | thee
|
||
|
ti | thee
|
||
|
tu | thy
|
||
|
tus | tu plural
|
||
|
ellas | they
|
||
|
nosotras | we
|
||
|
vosotros | you
|
||
|
vosotras | you
|
||
|
os | you
|
||
|
mío | mine
|
||
|
mía |
|
||
|
míos |
|
||
|
mías |
|
||
|
tuyo | thine
|
||
|
tuya |
|
||
|
tuyos |
|
||
|
tuyas |
|
||
|
suyo | his, hers, theirs
|
||
|
suya |
|
||
|
suyos |
|
||
|
suyas |
|
||
|
nuestro | ours
|
||
|
nuestra |
|
||
|
nuestros |
|
||
|
nuestras |
|
||
|
vuestro | yours
|
||
|
vuestra |
|
||
|
vuestros |
|
||
|
vuestras |
|
||
|
esos | those
|
||
|
esas | those
|
||
|
|
||
|
| forms of estar, to be (not including the infinitive):
|
||
|
estoy
|
||
|
estás
|
||
|
está
|
||
|
estamos
|
||
|
estáis
|
||
|
están
|
||
|
esté
|
||
|
estés
|
||
|
estemos
|
||
|
estéis
|
||
|
estén
|
||
|
estaré
|
||
|
estarás
|
||
|
estará
|
||
|
estaremos
|
||
|
estaréis
|
||
|
estarán
|
||
|
estaría
|
||
|
estarías
|
||
|
estaríamos
|
||
|
estaríais
|
||
|
estarían
|
||
|
estaba
|
||
|
estabas
|
||
|
estábamos
|
||
|
estabais
|
||
|
estaban
|
||
|
estuve
|
||
|
estuviste
|
||
|
estuvo
|
||
|
estuvimos
|
||
|
estuvisteis
|
||
|
estuvieron
|
||
|
estuviera
|
||
|
estuvieras
|
||
|
estuviéramos
|
||
|
estuvierais
|
||
|
estuvieran
|
||
|
estuviese
|
||
|
estuvieses
|
||
|
estuviésemos
|
||
|
estuvieseis
|
||
|
estuviesen
|
||
|
estando
|
||
|
estado
|
||
|
estada
|
||
|
estados
|
||
|
estadas
|
||
|
estad
|
||
|
|
||
|
| forms of haber, to have (not including the infinitive):
|
||
|
he
|
||
|
has
|
||
|
ha
|
||
|
hemos
|
||
|
habéis
|
||
|
han
|
||
|
haya
|
||
|
hayas
|
||
|
hayamos
|
||
|
hayáis
|
||
|
hayan
|
||
|
habré
|
||
|
habrás
|
||
|
habrá
|
||
|
habremos
|
||
|
habréis
|
||
|
habrán
|
||
|
habría
|
||
|
habrías
|
||
|
habríamos
|
||
|
habríais
|
||
|
habrían
|
||
|
había
|
||
|
habías
|
||
|
habíamos
|
||
|
habíais
|
||
|
habían
|
||
|
hube
|
||
|
hubiste
|
||
|
hubo
|
||
|
hubimos
|
||
|
hubisteis
|
||
|
hubieron
|
||
|
hubiera
|
||
|
hubieras
|
||
|
hubiéramos
|
||
|
hubierais
|
||
|
hubieran
|
||
|
hubiese
|
||
|
hubieses
|
||
|
hubiésemos
|
||
|
hubieseis
|
||
|
hubiesen
|
||
|
habiendo
|
||
|
habido
|
||
|
habida
|
||
|
habidos
|
||
|
habidas
|
||
|
|
||
|
| forms of ser, to be (not including the infinitive):
|
||
|
soy
|
||
|
eres
|
||
|
es
|
||
|
somos
|
||
|
sois
|
||
|
son
|
||
|
sea
|
||
|
seas
|
||
|
seamos
|
||
|
seáis
|
||
|
sean
|
||
|
seré
|
||
|
serás
|
||
|
será
|
||
|
seremos
|
||
|
seréis
|
||
|
serán
|
||
|
sería
|
||
|
serías
|
||
|
seríamos
|
||
|
seríais
|
||
|
serían
|
||
|
era
|
||
|
eras
|
||
|
éramos
|
||
|
erais
|
||
|
eran
|
||
|
fui
|
||
|
fuiste
|
||
|
fue
|
||
|
fuimos
|
||
|
fuisteis
|
||
|
fueron
|
||
|
fuera
|
||
|
fueras
|
||
|
fuéramos
|
||
|
fuerais
|
||
|
fueran
|
||
|
fuese
|
||
|
fueses
|
||
|
fuésemos
|
||
|
fueseis
|
||
|
fuesen
|
||
|
siendo
|
||
|
sido
|
||
|
| sed also means 'thirst'
|
||
|
|
||
|
| forms of tener, to have (not including the infinitive):
|
||
|
tengo
|
||
|
tienes
|
||
|
tiene
|
||
|
tenemos
|
||
|
tenéis
|
||
|
tienen
|
||
|
tenga
|
||
|
tengas
|
||
|
tengamos
|
||
|
tengáis
|
||
|
tengan
|
||
|
tendré
|
||
|
tendrás
|
||
|
tendrá
|
||
|
tendremos
|
||
|
tendréis
|
||
|
tendrán
|
||
|
tendría
|
||
|
tendrías
|
||
|
tendríamos
|
||
|
tendríais
|
||
|
tendrían
|
||
|
tenía
|
||
|
tenías
|
||
|
teníamos
|
||
|
teníais
|
||
|
tenían
|
||
|
tuve
|
||
|
tuviste
|
||
|
tuvo
|
||
|
tuvimos
|
||
|
tuvisteis
|
||
|
tuvieron
|
||
|
tuviera
|
||
|
tuvieras
|
||
|
tuviéramos
|
||
|
tuvierais
|
||
|
tuvieran
|
||
|
tuviese
|
||
|
tuvieses
|
||
|
tuviésemos
|
||
|
tuvieseis
|
||
|
tuviesen
|
||
|
teniendo
|
||
|
tenido
|
||
|
tenida
|
||
|
tenidos
|
||
|
tenidas
|
||
|
tened
|
||
|
''')
|
||
|
|
||
|
js_stemmer = """
|
||
|
var JSX={};(function(k){function l(b,e){var a=function(){};a.prototype=e.prototype;var c=new a;for(var d in b){b[d].prototype=c}}function I(c,b){for(var a in b.prototype)if(b.prototype.hasOwnProperty(a))c.prototype[a]=b.prototype[a]}function g(a,b,d){function c(a,b,c){delete a[b];a[b]=c;return c}Object.defineProperty(a,b,{get:function(){return c(a,b,d())},set:function(d){c(a,b,d)},enumerable:true,configurable:true})}function J(a,b,c){return a[b]=a[b]/c|0}var p=parseInt;var z=parseFloat;function K(a){return a!==a}var x=isFinite;var w=encodeURIComponent;var u=decodeURIComponent;var t=encodeURI;var s=decodeURI;var A=Object.prototype.toString;var q=Object.prototype.hasOwnProperty;function j(){}k.require=function(b){var a=o[b];return a!==undefined?a:null};k.profilerIsRunning=function(){return j.getResults!=null};k.getProfileResults=function(){return(j.getResults||function(){return{}})()};k.postProfileResults=function(a,b){if(j.postResults==null)throw new Error('profiler has not been turned on');return j.postResults(a,b)};k.resetProfileResults=function(){if(j.resetResults==null)throw new Error('profiler has not been turned on');return j.resetResults()};k.DEBUG=false;function r(){};l([r],Error);function a(a,b,c){this.F=a.length;this.K=a;this.L=b;this.I=c;this.H=null;this.P=null};l([a],Object);function m(){};l([m],Object);function i(){var a;var b;var c;this.G={};a=this.E='';b=this._=0;c=this.A=a.length;this.D=0;this.B=b;this.C=c};l([i],m);function v(a,b){a.E=b.E;a._=b._;a.A=b.A;a.D=b.D;a.B=b.B;a.C=b.C};function f(b,d,c,e){var a;if(b._>=b.A){return false}a=b.E.charCodeAt(b._);if(a>e||a<c){return false}a-=c;if((d[a>>>3]&1<<(a&7))===0){return false}b._++;return true};function h(a,d,c,e){var b;if(a._>=a.A){return false}b=a.E.charCodeAt(a._);if(b>e||b<c){a._++;return true}b-=c;if((d[b>>>3]&1<<(b&7))===0){a._++;return true}return false};function d(a,b,d){var c;if(a._-a.D<b){return false}if(a.E.slice((c=a._)-b,c)!==d){return false}a._-=b;return true};function n(f,m,p){var b;var d;var e;var n;var g;var k;var l;var i;var h;var c;var a;var j;var o;b=0;d=p;e=f._;n=f.A;g=0;k=0;l=false;while(true){i=b+(d-b>>>1);h=0;c=g<k?g:k;a=m[i];for(j=c;j<a.F;j++){if(e+c===n){h=-1;break}h=f.E.charCodeAt(e+c)-a.K.charCodeAt(j);if(h!==0){break}c++}if(h<0){d=i;k=c}else{b=i;g=c}if(d-b<=1){if(b>0){break}if(d===b){break}if(l){break}l=true}}while(true){a=m[b];if(g>=a.F){f._=e+a.F|0;if(a.H==null){return a.I}o=a.H(a.P);f._=e+a.F|0;if(o){return a.I}}b=a.L;if(b<0){return 0}}return-1};function e(d,m,p){var b;var g;var e;var n;var f;var k;var l;var i;var h;var c;var a;var j;var o;b=0;g=p;e=d._;n=d.D;f=0;k=0;l=false;while(true){i=b+(g-b>>1);h=0;c=f<k?f:k;a=m[i];for(j=a.F-1-c;j>=0;j--){if(e-c===n){h=-1;break}h=d.E.charCodeAt(e-1-c)-a.K.charCodeAt(j);if(h!==0){break}c++}if(h<0){g=i;k=c}else{b=i;f=c}if(g-b<=1){if(b>0){break}if(g===b){break}if(l){break}l=true}}while(true){a=m[b];if(f>=a.F){d._=e-a.F|0;if(a.H==null){return a.I}o=a.H(d);d._=e-a.F|0;if(o){return a.I}}b=a.L;if(b<0){return 0}}return-1};function B(a,b,d,e){var c;c=e.length-(d-b);a.E=a.E.slice(0,b)+e+a.E.slice(d);a.A+=c|0;if(a._>=d){a._+=c|0}else if(a._>b){a._=b}return c|0};function c(a,f){var b;var c;var d;var e;b=false;if((c=a.B)<0||c>(d=a.C)||d>(e=a.A)||e>a.E.length?false:true){B(a,a.B,a.C,f);b=true}return b};i.prototype.J=function(){return false};i.prototype.a=function(b){var a;var c;var d;var e;a=this.G['.'+b];if(a==null){c=this.E=b;d=this._=0;e=this.A=c.length;this.D=0;this.B=d;this.C=e;this.J();a=this.E;this.G['.'+b]=a}return a};i.prototype.stemWord=i.prototype.a;i.prototype.b=function(e){var d;var b;var c;var a;var f;var g;var h;d=[];for(b=0;b<e.length;b++){c=e[b];a=this.G['.'+c];if(a==null){f=this.E=c;g=this._=0;h=this.A=f.length;this.D=0;this.B=g;this.C=h;this.J();a=this.E;this.G['.'+c]=a}d.push(a)}return d};i.prototype.stemWords=i.prototype.b;function b(){i.call(this);this.I_p2=0;this.I_p1=0;this.I_pV=0};l([b],i);b.prototype.M=function(a){this.I_p2=a.I_p2;this.I_p1=a.I_p1;this.I_pV=a.I_pV;v(this,a)};b.prototype.copy_from=b.prototype.M;b.prototype.U=function(){var u;var w;var x;var y;var t;var l;var d;
|
||
|
var Stemmer = JSX.require("src/spanish-stemmer.jsx").SpanishStemmer;
|
||
|
"""
|
||
|
|
||
|
|
||
|
class SearchSpanish(SearchLanguage):
|
||
|
lang = 'es'
|
||
|
language_name = 'Spanish'
|
||
|
js_stemmer_rawcode = 'spanish-stemmer.js'
|
||
|
js_stemmer_code = js_stemmer
|
||
|
stopwords = spanish_stopwords
|
||
|
|
||
|
def init(self, options: Dict) -> None:
|
||
|
self.stemmer = snowballstemmer.stemmer('spanish')
|
||
|
|
||
|
def stem(self, word: str) -> str:
|
||
|
return self.stemmer.stemWord(word.lower())
|