Utente:BotSottile/user-fixes.py: differenze tra le versioni
Aspetto
Contenuto cancellato Contenuto aggiunto
Nessun oggetto della modifica |
striscie -> strisce |
||
Riga 76: | Riga 76: | ||
(u'\\b([sS])i(c|nattanto|n|ntanto)chè\\b', ur'\1i\2ché'), |
(u'\\b([sS])i(c|nattanto|n|ntanto)chè\\b', ur'\1i\2ché'), |
||
(u'\\b([sS])oprattuto\\b', ur'\1oprattutto'), |
(u'\\b([sS])oprattuto\\b', ur'\1oprattutto'), |
||
(u'\\b([sS])triscie\\b', ur'\1trisce'), |
|||
(u'\\b([sS])uperfice\\b', ur'\1uperficie'), |
(u'\\b([sS])uperfice\\b', ur'\1uperficie'), |
||
(u'\\b([tT])(al|almente|anto|ranne|utto)chè\\b', ur'\1\2ché'), |
(u'\\b([tT])(al|almente|anto|ranne|utto)chè\\b', ur'\1\2ché'), |
Versione attuale delle 14:21, 1 mar 2009
# -*- coding: utf-8 -*-
#
# Il codice può essere inserito in user-fixes.py (createlo se non esiste).
#
# Comando di esempio:
#
# replace.py -namespace:0 -xml:itwiki-20080418-pages-meta-current.xml -fix:errori_comuni
fixes['errori_comuni'] = {
'regex': True,
'recursive': True,
'msg': {
'it':u'Bot: Correzione di uno o più [[Utente:IagaBot/Errori_comuni|errori comuni]]'
},
'replacements': [
(u'(\\w)( | )(\\w)', ur'\1 \3'),
(u'([a-z]|[ìèéùòà\)]) ?\\.([A-Z]|È)', ur'\1. \2'),
(u'([a-z]|[ìèéùòà\)]) ?,([a-z]|")', ur'\1, \2'),
(u'([a-z]|[ìèéùòà\)]) ?(:|;)(\\w|")', ur'\1\2 \3'),
(u'(\\w|[ìèéùòà\)]) (,|\\.|:|;)', ur'\1\2'),
(u'(\\b|\\.)E\' ', ur'\1È '),
(u'\\b([Uu])n\'amico\\b', ur'\1n amico'),
(u'\\b(\\w+)zzion(\\w+)\\b', ur'\1zion\2'),
(u'\\b([aA])(bben|ccioc)chè\\b', ur'\1\2ché'),
(u'\\b([aA])(ffin|ncor|nzi|tteso)chè\\b', ur'\1\2ché'),
(u'\\b([aA])ccellera(re|zione)\\b', ur'\1ccelera\2'),
(u'\\b([aA])e?reoport(o|i)\\b', ur'\1eroport\2'),
(u'\\b([aA])ggiottaggio\\b', ur'\1ggiotaggio'),
(u'\\b([aA])l(cun|lor|tro)chè\\b', ur'\1l\2ché'),
(u'\\b([aA])ltretanto\\b', ur'\1ltrettanto'),
(u'\\b([aA])ppropiat(a|e|i|o)\\b', ur'\1ppropriat\2'),
(u'\\b([aA])pprovigionamento\\b', ur'\1pprovvigionamento'),
(u'\\b([aA])quistare\\b', ur'\1cquistare'),
(u'\\b([aA])vve(gna|gnadio|nga|ngadio)chè\\b', ur'\1vve\2ché'),
(u'\\b([bB])enchè\\b', ur'\1enché'),
(u'\\b([cC])(hec|ioc|omec|onciofosse|ontutto|osic|otal)chè\\b', ur'\1\2ché'),
(u'\\b([cC])osidett(o|i|e|a)\\b', ur'\1osiddett\2'),
(u'\\b([cC])ospiqu(o|i|e|a)\\b', ur'\1ospicu\2'),
(u'\\b([cC]om|)([pP])ropi(o|età|etari|etari[aeo])\\b', ur'\1\2ropri\3'),
(u'\\b([dD])(ac|appoi|imodo|opo|opodi)chè\\b', ur'\1\2ché'),
(u'\\b([dD]e|[cC]oef)ficen(za|te|ti)\\b', ur'\1ficien\2'),
(u'\\b([eE])ssendochè\\b', ur'\1ssendoché'),
(u'\\b([eE])vaqua(re|[t][oiae]|zione)\\b', ur'\1vacua\2'),
(u'\\b([fF])(inattanto|intanto|inac|inattanto|in|uor)chè\\b', ur'\1\2ché'),
(u'\\b([gG])(iac|ran|iafosse|iafossecosa)chè\\b', ur'\1\2ché'),
(u'\\b([iI])(nfinattanto|nquanto)chè\\b', ur'\1\2ché'),
(u'\\b([iI])gen(e|ic[oai]|iche)\\b', ur'\1gien\2'),
(u'\\b([iI])ngenier(e|i)\\b', ur'\1ngegner\2'),
(u'\\b([iI])nnoqu(i|o|a|e)\\b', ur'\1nnocu\2'),
(u'\\b([iI])nzio\\b', ur'\1nizio'),
(u'\\b([iI]l|)([lL])eggittim(o|i|e|a)\\b', ur'\1\2egittim\3'),
(u'\\b([iI]n|)([cC])oscen(za|te|ti)\\b', ur'\1\2oscien\3'),
(u'\\b([iI]n|)([sS])ufficen(za|te|ti)\\b', ur'\1\2ufficien\3'),
(u'\\b([lL])orchè\\b', ur'\1orché'),
(u'\\b([mM])acchè\\b', ur'\1acché'),
(u'\\b([mM])etereologi(a|co|ci|che)\\b', ur'\1eteorologi\2'),
(u'\\b([nN])on(so|)chè\\b', ur'\1on\2ché'),
(u'\\b([oO])(ltre|nde)chè\\b', ur'\1\2ché'),
(u'\\b([oO])nniscen(za|te|ti)\\b', ur'\1nniscien\2'),
(u'\\b([oO])noreficenza\\b', ur'\1norificenza'),
(u'\\b([oO])vverossia\\b', ur'\1vverosia'),
(u'\\b([pP])(oi|oscia|resso|ur)chè\\b', ur'\1\2ché'),
(u'\\b([pP])aralello\\b', ur'\1arallelo'),
(u'\\b([pP])er(cioc|lo|oc|)chè\\b', ur'\1er\2ché'),
(u'\\b([pP])iú\\b', ur'\1iù'),
(u'\\b([pP])rospicent(e|i)\\b', ur'\1rospicient\2'),
(u'\\b([pP])roveniendo\\b', ur'\1rovenendo'),
(u'\\b([pP]rofi|[pP]romis)qu(o|a|e|i)\\b', ur'\1cu\2'),
(u'\\b([qQ])ua(nto|si)chè\\b', ur'\1ua\2ché'),
(u'\\b([rR]i|)([cC])onoscien(za|te|ti)\\b', ur'\1\2onoscen\3'),
(u'\\b([sS])(econdo|ennon|enon|tante)chè\\b', ur'\1\2ché'),
(u'\\b([sS])cenz([ae])\\b', ur'\1cienz\2'),
(u'\\b([sS])enonch[èé]\b', ur'\1ennonch\2'),
(u'\\b([sS])i(c|nattanto|n|ntanto)chè\\b', ur'\1i\2ché'),
(u'\\b([sS])oprattuto\\b', ur'\1oprattutto'),
(u'\\b([sS])triscie\\b', ur'\1trisce'),
(u'\\b([sS])uperfice\\b', ur'\1uperficie'),
(u'\\b([tT])(al|almente|anto|ranne|utto)chè\\b', ur'\1\2ché'),
(u'\\b([tT])errittorio\\b', ur'\1erritorio'),
(u'<<', ur'«'),
(u'>>', ur'»'),
(u'[Kk]m[q2]', ur'km²')
],
'exceptions': {
'inside-tags': [
'hyperlink',
'link',
'comment',
'timeline',
'gallery',
'math',
'pre',
'startspace',
'source',
'nowiki'
] ,
'inside': [
r'(?s)<[^>]+>',
r'(?s)\{[^\}]+\}',
r'&[^;]+;',
r'\[([\w\W])+\]',
r'(a|d).C.',
r'\"([\w\W])+\"',
r'(?i)(s\.n\.c|s\.r\.l|s\.a\.s|s\.p\.a)',
r'[Mm](r|iss|ister)\.\w',
r'\.NET',
r'[Ss]t\.\w'
],
'text-contains': [
'IagaBot'
]
}
}
fixes['tutti'] = {
'regex': True,
'recursive': True,
'msg': {
'it':u'Bot: Correzione di uno o più [[Utente:IagaBot/Errori_comuni|errori comuni]]'
},
'replacements': [
(u'([a-z]|[ìèéùòà\)]) ?\\.([A-Z]|È)', ur'\1. \2'),
(u'([a-z]|[ìèéùòà\)]) ?,([a-z]|")', ur'\1, \2'),
(u'([a-z]|[ìèéùòà\)]) ?(:|;)(\\w|")', ur'\1\2 \3'),
(u'(\\w|[ìèéùòà\)]) (,|\\.|:|;)', ur'\1\2'),
(u'(\\b|\\.)E\' ', ur'\1È '),
(u'\\b([Uu])n\'amico\\b', ur'\1n amico'),
(u'\\b(\\w+)zzion(\\w+)\\b', ur'\1zion\2'),
(u'\\b([aA])(bben|ccioc)chè\\b', ur'\1\2ché'),
(u'\\b([aA])(ffin|ncor|nzi|tteso)chè\\b', ur'\1\2ché'),
(u'\\b([aA])ccellera(re|zione)\\b', ur'\1ccelera\2'),
(u'\\b([aA])e?reoport(o|i)\\b', ur'\1eroport\2'),
(u'\\b([aA])ggiottaggio\\b', ur'\1ggiotaggio'),
(u'\\b([aA])l(cun|lor|tro)chè\\b', ur'\1l\2ché'),
(u'\\b([aA])ltretanto\\b', ur'\1ltrettanto'),
(u'\\b([aA])ppropiat(a|e|i|o)\\b', ur'\1ppropriat\2'),
(u'\\b([aA])pprovigionamento\\b', ur'\1pprovvigionamento'),
(u'\\b([aA])quistare\\b', ur'\1cquistare'),
(u'\\b([aA])vve(gna|gnadio|nga|ngadio)chè\\b', ur'\1vve\2ché'),
(u'\\b([bB])enchè\\b', ur'\1enché'),
(u'\\b([cC])(hec|ioc|omec|onciofosse|ontutto|osic|otal)chè\\b', ur'\1\2ché'),
(u'\\b([cC])osidett(o|i|e|a)\\b', ur'\1osiddett\2'),
(u'\\b([cC])ospiqu(o|i|e|a)\\b', ur'\1ospicu\2'),
(u'\\b([cC]om|)([pP])ropi(o|età|etari|etari[aeo])\\b', ur'\1\2ropri\3'),
(u'\\b([dD])(ac|appoi|imodo|opo|opodi)chè\\b', ur'\1\2ché'),
(u'\\b([dD]e|[cC]oef)ficen(za|te|ti)\\b', ur'\1ficien\2'),
(u'\\b([eE])ssendochè\\b', ur'\1ssendoché'),
(u'\\b([eE])vaqua(re|[t][oiae]|zione)\\b', ur'\1vacua\2'),
(u'\\b([fF])(inattanto|intanto|inac|inattanto|in|uor)chè\\b', ur'\1\2ché'),
(u'\\b([gG])(iac|ran|iafosse|iafossecosa)chè\\b', ur'\1\2ché'),
(u'\\b([iI])(nfinattanto|nquanto)chè\\b', ur'\1\2ché'),
(u'\\b([iI])gen(e|ic[oai]|iche)\\b', ur'\1gien\2'),
(u'\\b([iI])ngenier(e|i)\\b', ur'\1ngegner\2'),
(u'\\b([iI])nnoqu(i|o|a|e)\\b', ur'\1nnocu\2'),
(u'\\b([iI])nzio\\b', ur'\1nizio'),
(u'\\b([iI]l|)([lL])eggittim(o|i|e|a)\\b', ur'\1\2egittim\3'),
(u'\\b([iI]n|)([cC])oscen(za|te|ti)\\b', ur'\1\2oscien\3'),
(u'\\b([iI]n|)([sS])ufficen(za|te|ti)\\b', ur'\1\2ufficien\3'),
(u'\\b([lL])orchè\\b', ur'\1orché'),
(u'\\b([mM])acchè\\b', ur'\1acché'),
(u'\\b([mM])etereologi(a|co|ci|che)\\b', ur'\1eteorologi\2'),
(u'\\b([nN])on(so|)chè\\b', ur'\1on\2ché'),
(u'\\b([oO])(ltre|nde)chè\\b', ur'\1\2ché'),
(u'\\b([oO])nniscen(za|te|ti)\\b', ur'\1nniscien\2'),
(u'\\b([oO])noreficenza\\b', ur'\1norificenza'),
(u'\\b([oO])vverossia\\b', ur'\1vverosia'),
(u'\\b([pP])(oi|oscia|resso|ur)chè\\b', ur'\1\2ché'),
(u'\\b([pP])aralello\\b', ur'\1arallelo'),
(u'\\b([pP])er(cioc|lo|oc|)chè\\b', ur'\1er\2ché'),
(u'\\b([pP])iú\\b', ur'\1iù'),
(u'\\b([pP])rospicent(e|i)\\b', ur'\1rospicient\2'),
(u'\\b([pP])roveniendo\\b', ur'\1rovenendo'),
(u'\\b([pP]rofi|[pP]romis)qu(o|a|e|i)\\b', ur'\1cu\2'),
(u'\\b([qQ])ua(nto|si)chè\\b', ur'\1ua\2ché'),
(u'\\b([rR]i|)([cC])onoscien(za|te|ti)\\b', ur'\1\2onoscen\3'),
(u'\\b([sS])(econdo|ennon|enon|tante)chè\\b', ur'\1\2ché'),
(u'\\b([sS])cenz([ae])\\b', ur'\1cienz\2'),
(u'\\b([sS])enonch[èé]\b', ur'\1ennonch\2'),
(u'\\b([sS])i(c|nattanto|n|ntanto)chè\\b', ur'\1i\2ché'),
(u'\\b([sS])oprattuto\\b', ur'\1oprattutto'),
(u'\\b([sS])uperfice\\b', ur'\1uperficie'),
(u'\\b([tT])(al|almente|anto|ranne|utto)chè\\b', ur'\1\2ché'),
(u'\\b([tT])errittorio\\b', ur'\1erritorio'),
(u'<<', ur'«'),
(u'>>', ur'»'),
(u'[Kk]m[q2]', ur'km²'),
##Da fixes.py
(r'(?i)<b>(.*?)</b>', r"'''\1'''"),
(r'(?i)<strong>(.*?)</strong>', r"'''\1'''"),
(r'(?i)<i>(.*?)</i>', r"''\1''"),
(r'(?i)<em>(.*?)</em>', r"''\1''"),
(r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2'),
(r'(?i)<hr ([^>/]+?)>', r'<hr \1 />'),
(r'(?i)([\r\n]) *<h1> *([^<]+?) *</h1> *([\r\n])', r"\1= \2 =\3"),
(r'(?i)([\r\n]) *<h2> *([^<]+?) *</h2> *([\r\n])', r"\1== \2 ==\3"),
(r'(?i)([\r\n]) *<h3> *([^<]+?) *</h3> *([\r\n])', r"\1=== \2 ===\3"),
(r'(?i)([\r\n]) *<h4> *([^<]+?) *</h4> *([\r\n])', r"\1==== \2 ====\3"),
(r'(?i)([\r\n]) *<h5> *([^<]+?) *</h5> *([\r\n])', r"\1===== \2 =====\3"),
(r'(?i)([\r\n]) *<h6> *([^<]+?) *</h6> *([\r\n])', r"\1====== \2 ======\3"),
# external link in double brackets
(r'\[\[(?P<url>https?://[^\]]+?)\]\]', r'[\g<url>]'),
# external link starting with double bracket
(r'\[\[(?P<url>https?://.+?)\]', r'[\g<url>]'),
# external link with forgotten closing bracket
#(r'\[(?P<url>https?://[^\]\s]+)\r\n', r'[\g<url>]\r\n'),
# external link ending with double bracket.
# do not change weblinks that contain wiki links inside
# inside the description
(r'\[(?P<url>https?://[^\[\]]+?)\]\](?!\])', r'[\g<url>]'),
# wiki link closed by single bracket.
# ATTENTION: There are some false positives, for example
# Brainfuck code examples or MS-DOS parameter instructions.
# There are also sometimes better ways to fix it than
# just putting an additional ] after the link.
(r'\[\[([^\[\]]+?)\](?!\])', r'[[\1]]'),
# wiki link opened by single bracket.
# ATTENTION: same as above.
(r'(?<!\[)\[([^\[\]]+?)\]\](?!\])', r'[[\1]]'),
# template closed by single bracket
# ATTENTION: There are some false positives, especially in
# mathematical context or program code.
(r'{{([^{}]+?)}(?!})', r'{{\1}}')
],
'exceptions': {
'inside-tags': [
'hyperlink',
'link',
'comment',
'timeline',
'gallery',
'math',
'pre',
'startspace',
'source',
'nowiki'
] ,
'inside': [
r'(?s)<[^>]+>',
r'(?s)\{[^\}]+\}',
r'&[^;]+;',
r'\[[\w\W]+\]',
r'(a|d).C.',
r'\"[\w\W]+\"',
r'(?i)(s\.n\.c|s\.r\.l|s\.a\.s|s\.p\.a)'
],
'text-contains': [
r'\[CDATA\[',
'IagaBot'
]
}
}
fixes['sostituzioni_standard'] = {
'regex': True,
'msg': {
'it':u'Bot: [[Utente:IagaBot/Sostituzioni_standard|sostituzioni standard]]'
},
'replacements': [
(u'\[\[[Ii]mage:(.*?)\]\]', ur'[[Immagine:\1]]'),
(u'\[\[[cC]ategory:(.*?)\]\]', ur'[[Categoria:\1]]'),
(u'== ?[vV]edi [aA]nche ?==', ur'== Voci correlate =='),
(u'== ?[lL]ink [eE]sterni ?==', ur'== Collegamenti esterni =='),
(u'== ?[vV]oci [Cc]orrelate ?==', ur'== Voci correlate =='),
(u'== ?[cC]ollegamenti [Ee]sterni ?==', ur'== Collegamenti esterni ==')
]
}