ویکی‌واژه:ربات/ربات تمیزکاری

این کد را به صورت گسترده و بر روی گروهی زیادی از لغات نرانید، باعث بهم ریختن صفحات می شود.

فعالیت‌ها

این ربات متن سرواژه‌ها را بر پایهٔ شیوه‌نامهٔ ویکی‌واژه تصحیح می‌کند و همچنین از تمام پارامترهای پیش‌فرض pywikipedia استفاده می‌کند.

کد

!/usr/bin/python
-*- coding: utf-8 -*-
BY: Z (User:ZxxZxxZ on fa.wikipedia)
BY: رضا (User:reza1615 on fa.wikipedia)
Distributed under the terms of the CC-BY-SA 3.0 .

import wikipedia as pywikibot import wikipedia import pagegenerators import re, os, codecs, catlib wikipedia.config.put_throttle = 0 wikipedia.put_throttle.setDelay() faSite = wikipedia.getSite('fa') enSite = wikipedia.getSite('en') txtTmp=' ' faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیًٌٍَُِّْٓيك' enChrs=u'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM' faNums = u'۰۱۲۳۴۵۶۷۸۹' arNums = u'٠١٢٣٤٥٦٧٨٩' enNums = u'0123456789'

msg = u'ربات: اصلاح بر پایه شیوه‌نامه %s'

def BotRun(page,text_fa):

--------------------------------------------------------------action that you want to do on pages-----------------

   text_fa=text_fa.replace(u']]،',u'*\n').replace(u'',u'\n[[').replace(u'\n\n\n',u'\n\n').replace(u'\n\n\n',u'\n\n')
   text_fa=text_fa.replace(u'# ',u)
   text_fa=text_fa.replace(u')=',u'@@=')
   text_fa=text_fa.replace(u')',u')\n')
   text_fa=text_fa.replace(u'@@=',u')=')
   lines=text_fa.split(u'\n')
   text='\n'
   for line in lines:
       line=line.strip()
       if line.find(u'[')!=-1 or line.find(u'#')!=-1 or line.find(u'{')!=-1 or line.find(u'/')!=-1 or line.find(u'=')!=-1 or line== or line.find(u'(')!=-1 or line.find(u')')!=-1 or line.find(u'معین')!=-1 or line.find(u'عمید')!=-1 or line.find(u'دهخدا')!=-1 or line.find(u'لغت')!=-1: 
               textline=line+u'\n'
       else:
           
           if line.find(u'*')!=-1:
               textline=line.replace(u'*',u'#')+u'\n'
           else:
               if  line.find(u'#')==-1:
                   textline=u'# '+line+u'\n'
       line2=textline.replace(u'#',u).strip()
       if line2==:
           textline=u'\n'
       
       textline=textline.replace(u'#=',u'=').replace(u'#*',u'#')
       textline=textline.replace(u'#{',u'{')
       if  line.find(u':')!=-1:
           textline=textline.replace(u'#[[',u'[[')
       text+=textline
   text=text.replace(u'
',u'\n').replace(u'
',u'\n').replace(u'
',u'\n').replace(u'
',u'\n')
   text=text.replace(u'#(',u'\n(').replace(u'# ؛',u'#').replace(u'#؛',u'#').replace(u'#.',u'#').replace(u'#،',u'#').replace(u'#,',u'#')
   text=text.replace(u'فرهنگ لغت معین',u'فرهنگ لغت معین\n').replace(u'
',u'\n').replace(u'\n\n\n',u'\n\n').replace(u'\n\n\n',u'\n\n')
   text=text.replace(u'# فرهنگ',u'* فرهنگ').replace(u'#فرهنگ',u'*فرهنگ')
   text=text.replace(u'=جُستارهای وابسته=',u'=جستارهای وابسته=').replace(u'=همچنین ببینید=',u'=جستارهای وابسته=')
   text=text.replace(u'= منبع =',u'=منابع=').replace(u'= منبع‌ها =',u'=منابع=')
   text=text.replace(u'=منابع=',u'=منابع=').replace(u'=منبع‌ها=',u'=منابع=').replace(u'=منبع ها=',u'=منابع=').replace(u'=مراجع=',u'=منابع=').replace(u'=منبع=',u'=منابع=')
   text=text.replace(u'=====منابع=====',u'@@@@منابع@@@@').replace(u'====منابع====',u'@@@@منابع@@@@').replace(u'===منابع===',u'@@@@منابع@@@@').replace(u'==منابع==',u'@@@@منابع@@@@').replace(u'=منابع=',u'@@@@منابع@@@@')
   text=text.replace(u'@@@@منابع@@@@',u'==== منابع ====')

   return text.strip()+u'\n'

----------------------------------------------------------end of action that you want to do on pages---------------

def faToEn(faTxt, type):

 # changing english to farsi numbers
 faChrsToFrcRplc = u'۰۱۲۳۴۵۶۷۸۹«»٬٫٪،؛؟'
 enChrsToFrcRplc = u'0123456789""`.%,;?'
 if type is 'force':
   for i in range(0,18):
     enTxt = faTxt.replace(enChrsToFrcRplc[i], faChrsToFrcRplc[i])
 elif type is 'safe':
   pass
 return enTxt

def fixCsmtc(faTxt, type):

   # ZWNJ
   if type is 'force':
       txt = re.sub(u'‌{2,}', u'‌', txtTmp) # پشت‌سرهم
       txt = re.sub(u'\[\[([^\]\|]*?)‌]](%s+)' % faChrs, ur'\1‌\2', txt) # Piping
       txt = re.sub(u'‌(?![ئاآأإژزرذدوؤةبپتثجچحخسشصضطظعغفقکگلمنهیيًٌٍَُِّْٰٓٔ]|[\u0900-\u097F]|ֹ)', , txt) # در پس
       txt = re.sub(u'(?<![ئبپتثجچحخسشصضطظعغفقکگلمنهیيًٌٍَُِّْٰٓٔ]|[\u0900-\u097F]|f|ֹ)‌', , txt) # در پیش
   elif type is 'safe':
       ###
       a=1
   return enTxt

def run(generator):

       for page in generator: 
           try:
               if not page.canBeEdited():
                   wikipedia.output( u'Skipping locked page %s' % page.title() )
                   continue
               text_fa = page.get()#------------------------------geting pages content
           except wikipedia.NoPage:
               wikipedia.output( u'Page %s not found' % page.title() )
               continue
           except wikipedia.IsRedirectPage:#----------------------geting pages redirects contents
                pageRedirect = page.getRedirectTarget()
                text_fa = pageRedirect.get()
                wikipedia.output( u'Page %s was Redirect but edited!' %  pageRedirect )                
           except:
                continue

           new_text=BotRun(page,text_fa)
           savepart( page,new_text )#---------------saving changes in page with new_text content-----------------------------------

def savepart( page,new_text):

           try:
               page.put( new_text,msg % page ,watchArticle = None,minorEdit = True)
           except wikipedia.EditConflict:
               wikipedia.output( u'Skipping %s because of edit conflict' % ( page.title() ) )
           except wikipedia.SpamfilterError,url:
               wikipedia.output( u'Cannot change %s because of blacklist entry %s' % ( page.title(),url ) )

def main():

   summary_commandline,template,gen = None,None,None
   exceptions,PageTitles,namespaces = [],[],[]
   autoText,autoTitle = False,False
   genFactory = pagegenerators.GeneratorFactory()
   wikipedia.setAction( msg )
   arg=True#------if you dont want to work with arguments leave it False if you want change it to True---
   if arg==True:
       for arg in wikipedia.handleArgs():
           if arg == '-autotitle':
               autoTitle = True
           elif arg == '-autotext':
               autoText = True
           elif arg.startswith( '-page:' ):
               if len(arg) == 6:
                   PageTitles.append(wikipedia.input( u'Which page do you want to chage?' ))
               else:
                   PageTitles.append(arg[6:])
           elif arg.startswith( '-cat:' ):
               if len(arg) == 5:
                   PageTitles.append(wikipedia.input( u'Which Category do you want to chage?' ))
               else:
                   PageTitles.append('Category:'+arg[5:])
           elif arg.startswith( '-template:' ):
               if len(arg) == 10:
                   PageTitles.append(wikipedia.input( u'Which Template do you want to chage?' ))
               else:
                   PageTitles.append('Template:'+arg[10:])
           elif arg.startswith('-except:'):
               exceptions.append(arg[8:])
           elif arg.startswith( '-namespace:' ):
               namespaces.append( int( arg[11:] ) )
           elif arg.startswith( '-ns:' ):
               namespaces.append( int( arg[4:] ) )
           elif arg.startswith( '-summary:' ):
               wikipedia.setAction( arg[9:] )
               summary_commandline = True
           else:
               generator = genFactory.handleArg(arg)
               if generator:
                   gen = generator
   else:
       PageTitles = [raw_input(u'Page:> ').decode('utf-8')]

   if PageTitles:
       pages = [wikipedia.Page(faSite,PageTitle) for PageTitle in PageTitles]
       gen = iter( pages )
   if not gen:
       wikipedia.stopme()
       sys.exit()
   if namespaces != []:
       gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
   preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 60 )#---number of pages that you want load at same time
   run(preloadingGen)

if __name__ == "__main__":

   try:
       main()
   finally:
       wikipedia.stopme()