| 1 | #!/usr/bin/env python |
|---|
| 2 | # |
|---|
| 3 | # wikka2doku.py |
|---|
| 4 | # This script help you to import pages (with all history!!!) from Wikka Wiki |
|---|
| 5 | # to DokuWiki. You still need to edit some of the generated pages. |
|---|
| 6 | # |
|---|
| 7 | # Tested MySQL dump version: 10.11 |
|---|
| 8 | # |
|---|
| 9 | # Wed Jan 31 02:24:30 CET 2007 |
|---|
| 10 | # - Eriol (@mornie.org) |
|---|
| 11 | # |
|---|
| 12 | # Thanks to MancaUSoft for bugs hunting and tosky for contributing! ;) |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | import time |
|---|
| 17 | import re |
|---|
| 18 | import gzip |
|---|
| 19 | import os |
|---|
| 20 | import os.path |
|---|
| 21 | import getopt |
|---|
| 22 | import sys |
|---|
| 23 | |
|---|
| 24 | USER_LIST = {} |
|---|
| 25 | |
|---|
| 26 | def w2t(s): |
|---|
| 27 | ''' wikka time 2 doku time ''' |
|---|
| 28 | wt = time.strptime(s,'%Y-%m-%d %H:%M:%S') |
|---|
| 29 | return int(time.mktime(wt)) |
|---|
| 30 | |
|---|
| 31 | pages = re.compile( |
|---|
| 32 | r'''INSERT\sINTO\s`wikka_pages`\sVALUES\s\( # INSERT INTO... |
|---|
| 33 | (\d*),\'(\w*)\', # page id and page name |
|---|
| 34 | \'(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\', # date and time |
|---|
| 35 | \'(.*?)\', # page |
|---|
| 36 | \'(\w*)\', # owner |
|---|
| 37 | \'([-\d\.\w]*)\', # user or hostname |
|---|
| 38 | \'(\w)\', # last |
|---|
| 39 | ''', |
|---|
| 40 | re.VERBOSE | |
|---|
| 41 | re.IGNORECASE | |
|---|
| 42 | re.DOTALL) |
|---|
| 43 | |
|---|
| 44 | users = re.compile( |
|---|
| 45 | r'''INSERT\sINTO\s`wikka_users`\sVALUES\s\( #INSERT INTO... |
|---|
| 46 | \'(\w*)\', # user name |
|---|
| 47 | \'(\w*)\', # password |
|---|
| 48 | \'([\w*\.\w*]+\@[\w*\.\w*]+)\' |
|---|
| 49 | ''', |
|---|
| 50 | re.VERBOSE | |
|---|
| 51 | re.IGNORECASE | |
|---|
| 52 | re.DOTALL) |
|---|
| 53 | |
|---|
| 54 | def get_users(fin): |
|---|
| 55 | l = [] |
|---|
| 56 | m = users.findall(fin) |
|---|
| 57 | for row in m: |
|---|
| 58 | username, password, email = row |
|---|
| 59 | l.append(username) |
|---|
| 60 | return l |
|---|
| 61 | |
|---|
| 62 | def trasftable(mo): |
|---|
| 63 | columns = int(mo.groups()[0]) |
|---|
| 64 | cells = mo.groups()[1] |
|---|
| 65 | tablestr = '' |
|---|
| 66 | |
|---|
| 67 | splitted_cells = cells.split(';') |
|---|
| 68 | heading = splitted_cells[:columns] |
|---|
| 69 | body = splitted_cells[columns:] |
|---|
| 70 | body = zip(*[body[i::columns] for i in range(columns)]) |
|---|
| 71 | |
|---|
| 72 | tablestr = '^' + '^'.join(heading) + '^\n' |
|---|
| 73 | for cells in body: |
|---|
| 74 | tablestr += '|' + '|'.join(cells) + '|\n' |
|---|
| 75 | |
|---|
| 76 | return tablestr |
|---|
| 77 | |
|---|
| 78 | def process_bodypage(body): |
|---|
| 79 | link = re.compile(r'''\[\[([^\[\]]+?)\s([^\[\]]+?)\]\]''') |
|---|
| 80 | ulist = re.compile(r'''(\t| +)~?-''') |
|---|
| 81 | autolink = re.compile(r'''\b(?!\[\[) # Not starting with [[ |
|---|
| 82 | ([A-Z][a-z]*[A-Z]+[a-z]+[a-zA-Z]*) # CamelCase |
|---|
| 83 | (?!\]\]|\|)\b # Not ending with ]] or | |
|---|
| 84 | ''', re.VERBOSE) |
|---|
| 85 | images = re.compile(r'''img="(.*?)"''') |
|---|
| 86 | monospaced = re.compile(r'''##(.*?)##''') |
|---|
| 87 | code = re.compile(r'''%%\((.*?)\)(.*?)%%''') |
|---|
| 88 | bold = re.compile(r'''\+\+(.*?)\+\+''') |
|---|
| 89 | table = re.compile(r'''{{table[ ]+columns="(.*?)".*cells="(.*?)"}}''') |
|---|
| 90 | |
|---|
| 91 | body = body.replace('\\n', '\n') |
|---|
| 92 | body = body.replace('~~-', ' *') |
|---|
| 93 | body = body.replace('~-', ' *') |
|---|
| 94 | body = body.replace('~~~&', '>>>') |
|---|
| 95 | body = body.replace('~~&', '>>') |
|---|
| 96 | body = body.replace('~&', '>') |
|---|
| 97 | body = body.replace("\\'", "'") |
|---|
| 98 | body = body.replace('\\"', '"') |
|---|
| 99 | body = body.replace('""', '') |
|---|
| 100 | body = link.sub(r'[[\1|\2]]', body) |
|---|
| 101 | body = autolink.sub(r'[[\1]]', body) |
|---|
| 102 | body = ulist.sub(r'\1*', body) |
|---|
| 103 | body = images.sub(r'{{\1}}', body) |
|---|
| 104 | body = monospaced.sub(r"''\1''", body) |
|---|
| 105 | body = code.sub(r'<code \1>\2</code>', body) |
|---|
| 106 | body = bold.sub(r'<del>\1</del>', body) |
|---|
| 107 | body = table.sub(trasftable, body) |
|---|
| 108 | |
|---|
| 109 | return body |
|---|
| 110 | |
|---|
| 111 | def process_user(user): |
|---|
| 112 | |
|---|
| 113 | if user in USER_LIST: |
|---|
| 114 | return USER_LIST[user] |
|---|
| 115 | else: |
|---|
| 116 | return user.lower() |
|---|
| 117 | |
|---|
| 118 | def makepages(fin, base_directory='.'): |
|---|
| 119 | knowpages = {} |
|---|
| 120 | |
|---|
| 121 | fmeta = open(os.path.join(base_directory, 'changes.log'), 'a') |
|---|
| 122 | |
|---|
| 123 | # Fixing a problematic Owner name instead of make more complex |
|---|
| 124 | # the pages regex |
|---|
| 125 | fin = fin.replace('(Public)', 'Public') |
|---|
| 126 | |
|---|
| 127 | m = pages.findall(fin) |
|---|
| 128 | |
|---|
| 129 | for row in m: |
|---|
| 130 | page_id, page_name, date, page_body, owner, user, last = row |
|---|
| 131 | page_name = page_name.lower() |
|---|
| 132 | |
|---|
| 133 | user = process_user(user) |
|---|
| 134 | |
|---|
| 135 | if page_name in knowpages: |
|---|
| 136 | meta_operation = '' |
|---|
| 137 | else: |
|---|
| 138 | meta_operation = 'created' |
|---|
| 139 | knowpages[page_name] = None |
|---|
| 140 | |
|---|
| 141 | converted_body = process_bodypage(page_body) |
|---|
| 142 | |
|---|
| 143 | if last == 'Y': # go in pages |
|---|
| 144 | path = os.path.join(base_directory, 'pages/') |
|---|
| 145 | fpage = open(path + page_name + '.txt', 'w') |
|---|
| 146 | fpage.write(converted_body) |
|---|
| 147 | fpage.close() |
|---|
| 148 | else: |
|---|
| 149 | path = os.path.join(base_directory, 'attic/') |
|---|
| 150 | fattic = gzip.open(path + page_name + '.' + str(w2t(date)) + '.txt.gz', |
|---|
| 151 | 'wb') |
|---|
| 152 | fattic.write(converted_body) |
|---|
| 153 | fattic.close() |
|---|
| 154 | |
|---|
| 155 | fmeta.write('\t'.join([str(w2t(date)), '127.0.0.1', page_name, |
|---|
| 156 | user, meta_operation]) + '\n') |
|---|
| 157 | |
|---|
| 158 | fmeta.close() |
|---|
| 159 | |
|---|
| 160 | def usage(): |
|---|
| 161 | use = ''' |
|---|
| 162 | wikka2doku [-d directory] wikkadump.sql |
|---|
| 163 | Launch passing a sql dump of your wikka wiki database. |
|---|
| 164 | |
|---|
| 165 | -d directory: |
|---|
| 166 | Creates pages, history and metadata in the |
|---|
| 167 | directory passed as parameter |
|---|
| 168 | -u users: |
|---|
| 169 | Change username depending on file passed as |
|---|
| 170 | parameter. |
|---|
| 171 | The format of this file must be: |
|---|
| 172 | |
|---|
| 173 | old_username<space>new_username |
|---|
| 174 | ''' |
|---|
| 175 | print use |
|---|
| 176 | |
|---|
| 177 | if __name__ == '__main__': |
|---|
| 178 | try: |
|---|
| 179 | opts, args = getopt.getopt(sys.argv[1:], "hd:u:", ['help', |
|---|
| 180 | 'directory=', |
|---|
| 181 | 'users=']) |
|---|
| 182 | |
|---|
| 183 | except getopt.GetoptError: |
|---|
| 184 | |
|---|
| 185 | usage() |
|---|
| 186 | sys.exit(2) |
|---|
| 187 | |
|---|
| 188 | if not args: |
|---|
| 189 | usage() |
|---|
| 190 | sys.exit(2) |
|---|
| 191 | elif not os.path.isfile(args[0]): |
|---|
| 192 | print args[0], 'not found :(' |
|---|
| 193 | sys.exit(2) |
|---|
| 194 | |
|---|
| 195 | base_directory = '.' |
|---|
| 196 | users_file = None |
|---|
| 197 | |
|---|
| 198 | for o, x in opts: |
|---|
| 199 | if o in ("-h", "--help"): |
|---|
| 200 | usage() |
|---|
| 201 | sys.exit() |
|---|
| 202 | if o in ("-d", "--directory"): |
|---|
| 203 | base_directory = x |
|---|
| 204 | if o in ("-u", "--users"): |
|---|
| 205 | users_file = x |
|---|
| 206 | |
|---|
| 207 | for p in ('attic', 'pages'): |
|---|
| 208 | d = os.path.join(base_directory, p) |
|---|
| 209 | if not os.path.isdir(d): |
|---|
| 210 | os.makedirs(d) |
|---|
| 211 | |
|---|
| 212 | if users_file: |
|---|
| 213 | for line in open(users_file): |
|---|
| 214 | old_user, new_user = line.split() |
|---|
| 215 | USER_LIST[old_user] = new_user |
|---|
| 216 | |
|---|
| 217 | fline = file(args[0], 'r').read() |
|---|
| 218 | makepages(fline, base_directory) |
|---|