Package zinnia :: Package management :: Package commands :: Module wp2zinnia
[hide private]

Source Code for Module zinnia.management.commands.wp2zinnia

  1  """WordPress to Zinnia command module""" 
  2  import sys 
  3  from datetime import datetime 
  4  from optparse import make_option 
  5  from xml.etree import ElementTree as ET 
  6   
  7  from django.utils.html import strip_tags 
  8  from django.db.utils import IntegrityError 
  9  from django.utils.encoding import smart_str 
 10  from django.contrib.auth.models import User 
 11  from django.contrib.sites.models import Site 
 12  from django.utils.text import truncate_words 
 13  from django.db.models.signals import post_save 
 14  from django.template.defaultfilters import slugify 
 15  from django.contrib.comments.models import Comment 
 16  from django.core.management.base import CommandError 
 17  from django.core.management.base import LabelCommand 
 18   
 19  from tagging.models import Tag 
 20   
 21  from zinnia import __version__ 
 22  from zinnia.models import Entry 
 23  from zinnia.models import Category 
 24  from zinnia.managers import DRAFT, HIDDEN, PUBLISHED 
 25   
 26   
27 -class Command(LabelCommand):
28 """Command object for importing a WordPress blog 29 into Zinnia via a WordPress eXtended RSS (WXR) file.""" 30 help = 'Import a Wordpress blog into Zinnia.' 31 label = 'WXR file' 32 args = 'wordpress.xml' 33 34 option_list = LabelCommand.option_list + ( 35 make_option('--noautoexcerpt', action='store_false', dest='auto_excerpt', 36 default=True, help='Do NOT generate an excerpt if not present.'), 37 make_option('--author', dest='author', default='', 38 help='All imported entries belong to specified author'), 39 ) 40 41 SITE = Site.objects.get_current() 42 REVERSE_STATUS = {'pending': DRAFT, 43 'draft': DRAFT, 44 'auto-draft': DRAFT, 45 'inherit': DRAFT, 46 'publish': PUBLISHED, 47 'future': PUBLISHED, 48 'trash': HIDDEN, 49 'private': PUBLISHED} 50
51 - def __init__(self):
52 """Init the Command and add custom styles""" 53 super(Command, self).__init__() 54 self.style.TITLE = self.style.SQL_FIELD 55 self.style.STEP = self.style.SQL_COLTYPE 56 self.style.ITEM = self.style.HTTP_INFO 57 # Disconnecting signals provided by Zinnia 58 post_save.disconnect(sender=Entry, 59 dispatch_uid='zinnia.entry.post_save.ping_directories') 60 post_save.disconnect(sender=Entry, 61 dispatch_uid='zinnia.entry.post_save.ping_external_urls')
62
63 - def write_out(self, message, verbosity_level=1):
64 """Convenient method for outputing""" 65 if self.verbosity and self.verbosity >= verbosity_level: 66 sys.stdout.write(smart_str(message)) 67 sys.stdout.flush()
68
69 - def handle_label(self, wxr_file, **options):
70 self.verbosity = int(options.get('verbosity', 1)) 71 self.auto_excerpt = options.get('auto_excerpt', True) 72 self.default_author = options.get('author') 73 if self.default_author: 74 try: 75 self.default_author = User.objects.get(username=self.default_author) 76 except User.DoesNotExist: 77 raise CommandError('Invalid username for default author') 78 79 self.write_out(self.style.TITLE('Starting migration from Wordpress to Zinnia %s:\n' % __version__)) 80 81 tree = ET.parse(wxr_file) 82 83 self.authors = self.import_authors(tree) 84 85 self.categories = self.import_categories( 86 tree.findall('channel/{http://wordpress.org/export/1.0/}category')) 87 88 self.import_tags( 89 tree.findall('channel/{http://wordpress.org/export/1.0/}tag')) 90 91 self.import_entries(tree.findall('channel/item'))
92
93 - def import_authors(self, tree):
94 """Retrieve all the authors used in posts 95 and convert it to new or existing user, and 96 return the convertion""" 97 self.write_out(self.style.STEP('- Importing authors\n')) 98 99 post_authors = set() 100 for item in tree.findall('channel/item'): 101 post_type = item.find('{http://wordpress.org/export/1.0/}post_type').text 102 if post_type == 'post': 103 post_authors.add(item.find('{http://purl.org/dc/elements/1.1/}creator').text) 104 105 self.write_out('%i authors found.\n' % len(post_authors)) 106 107 authors = {} 108 for post_author in post_authors: 109 if self.default_author: 110 authors[post_author] = self.default_author 111 else: 112 authors[post_author] = self.migrate_author(post_author) 113 return authors
114
115 - def migrate_author(self, author_name):
116 """Handle actions for migrating the users""" 117 select_action_text = "The author '%s' need to be migrated to an User:\n"\ 118 "1. Use an existing user ?\n"\ 119 "2. Create a new user ?\n"\ 120 "Please select a choice: " % author_name 121 while 42: 122 selection = raw_input(smart_str(select_action_text)) 123 if selection in '12': 124 break 125 if selection == '1': 126 users = User.objects.all() 127 usernames = [user.username for user in users] 128 while 42: 129 select_user_text = "1. Select your user, by typing one of theses usernames:\n"\ 130 "[%s]\n"\ 131 "Please select a choice: " % ', '.join(usernames) 132 user_selected = raw_input(select_user_text) 133 if user_selected in usernames: 134 break 135 return users.get(username=user_selected) 136 else: 137 create_user_text = "2. Please type the email of the '%s' user: " % author_name 138 author_mail = raw_input(create_user_text) 139 try: 140 return User.objects.create_user(author_name, author_mail) 141 except IntegrityError: 142 return User.objects.get(username=author_name)
143
144 - def import_categories(self, category_nodes):
145 """Import all the categories from 'wp:category' nodes, 146 because categories in 'item' nodes are not necessarily 147 all the categories and returning it in a dict for 148 database optimizations.""" 149 self.write_out(self.style.STEP('- Importing categories\n')) 150 151 categories = {} 152 for category_node in category_nodes: 153 title = category_node.find('{http://wordpress.org/export/1.0/}cat_name').text[:255] 154 slug = category_node.find('{http://wordpress.org/export/1.0/}category_nicename').text[:255] 155 try: 156 parent = category_node.find('{http://wordpress.org/export/1.0/}category_parent').text[:255] 157 except TypeError: 158 parent = None 159 self.write_out('> %s... ' % title) 160 category, created = Category.objects.get_or_create( 161 title=title, slug=slug, parent=categories.get(parent)) 162 categories[title] = category 163 self.write_out(self.style.ITEM('OK\n')) 164 return categories
165
166 - def import_tags(self, tag_nodes):
167 """Import all the tags form 'wp:tag' nodes, 168 because tags in 'item' nodes are not necessarily 169 all the tags, then use only the nicename, because it's like 170 a slug and the true tag name may be not valid for url usage.""" 171 self.write_out(self.style.STEP('- Importing tags\n')) 172 for tag_node in tag_nodes: 173 tag_name = tag_node.find('{http://wordpress.org/export/1.0/}tag_slug').text[:50] 174 self.write_out('> %s... ' % tag_name) 175 Tag.objects.get_or_create(name=tag_name) 176 self.write_out(self.style.ITEM('OK\n'))
177
178 - def get_entry_tags(self, categories):
179 """Return a list of entry's tags, 180 by using the nicename for url compatibility""" 181 tags = [] 182 for category in categories: 183 domain = category.attrib.get('domain', 'category') 184 if domain == 'tag' and category.attrib.get('nicename'): 185 tags.append(category.attrib.get('nicename')) 186 return tags
187
188 - def get_entry_categories(self, category_nodes):
189 """Return a list of entry's categories 190 based of imported categories""" 191 categories = [] 192 for category_node in category_nodes: 193 domain = category_node.attrib.get('domain') 194 if domain == 'category': 195 categories.append(self.categories[category_node.text]) 196 return categories
197
198 - def import_entry(self, title, content, item_node):
199 """Importing an entry but some data are missing like 200 the image, related entries, start_publication and end_publication. 201 start_publication and creation_date will use the same value, 202 wich is always in Wordpress $post->post_date""" 203 creation_date = datetime.strptime( 204 item_node.find('{http://wordpress.org/export/1.0/}post_date').text, 205 '%Y-%m-%d %H:%M:%S') 206 207 excerpt = item_node.find('{http://wordpress.org/export/1.0/excerpt/}encoded').text 208 if not excerpt: 209 if self.auto_excerpt: 210 excerpt = truncate_words(strip_tags(content), 50) 211 else: 212 excerpt = '' 213 214 entry_dict = {'content': content, 215 'excerpt': excerpt, 216 # Prefer use this function than 217 # item_node.find('{http://wordpress.org/export/1.0/}post_name').text 218 # Because slug can be not well formated 219 'slug': slugify(title)[:255] or 'post-%s' % item_node.find('{http://wordpress.org/export/1.0/}post_id').text, 220 'tags': ', '.join(self.get_entry_tags(item_node.findall('category'))), 221 'status': self.REVERSE_STATUS[item_node.find('{http://wordpress.org/export/1.0/}status').text], 222 'comment_enabled': item_node.find('{http://wordpress.org/export/1.0/}comment_status').text == 'open', 223 'pingback_enabled': item_node.find('{http://wordpress.org/export/1.0/}ping_status').text == 'open', 224 'featured': item_node.find('{http://wordpress.org/export/1.0/}is_sticky').text == '1', 225 'password': item_node.find('{http://wordpress.org/export/1.0/}post_password').text or '', 226 'login_required': item_node.find('{http://wordpress.org/export/1.0/}status').text == 'private', 227 'creation_date': creation_date, 228 'last_update': datetime.now(), 229 'start_publication': creation_date} 230 231 entry, created = Entry.objects.get_or_create(title=title, 232 defaults=entry_dict) 233 234 entry.categories.add(*self.get_entry_categories(item_node.findall('category'))) 235 entry.authors.add(self.authors[item_node.find('{http://purl.org/dc/elements/1.1/}creator').text]) 236 entry.sites.add(self.SITE) 237 238 #current_id = item_node.find('{http://wordpress.org/export/1.0/}post_id').text 239 #parent_id = item_node.find('{http://wordpress.org/export/1.0/}post_parent').text 240 241 return entry
242
243 - def import_entries(self, items):
244 """Loops over items and find entry to import, 245 an entry need to have 'post_type' set to 'post' and 246 have content.""" 247 self.write_out(self.style.STEP('- Importing entries\n')) 248 249 for item_node in items: 250 title = (item_node.find('title').text or '')[:255] 251 post_type = item_node.find('{http://wordpress.org/export/1.0/}post_type').text 252 content = item_node.find('{http://purl.org/rss/1.0/modules/content/}encoded').text 253 254 if post_type == 'post' and content and title: 255 self.write_out('> %s... ' % title) 256 entry = self.import_entry(title, content, item_node) 257 self.write_out(self.style.ITEM('OK\n')) 258 self.import_comments(entry, item_node.findall( 259 '{http://wordpress.org/export/1.0/}comment/')) 260 else: 261 self.write_out('> %s... ' % title, 2) 262 self.write_out(self.style.NOTICE('SKIPPED (not a post)\n'), 2)
263
264 - def import_comments(self, entry, comment_nodes):
265 """Loops over comments nodes and import then 266 in django.contrib.comments""" 267 for comment_node in comment_nodes: 268 is_pingback = comment_node.find( 269 '{http://wordpress.org/export/1.0/}comment_type').text == 'pingback' 270 is_trackback = comment_node.find( 271 '{http://wordpress.org/export/1.0/}comment_type').text == 'trackback' 272 273 title = 'Comment #%s' % (comment_node.find( 274 '{http://wordpress.org/export/1.0/}comment_id/').text) 275 self.write_out(' > %s... ' % title) 276 277 content = comment_node.find( 278 '{http://wordpress.org/export/1.0/}comment_content/').text 279 if not content: 280 self.write_out(self.style.NOTICE('SKIPPED (unfilled)\n')) 281 return 282 283 submit_date = datetime.strptime( 284 comment_node.find('{http://wordpress.org/export/1.0/}comment_date').text, 285 '%Y-%m-%d %H:%M:%S') 286 287 approvation = comment_node.find('{http://wordpress.org/export/1.0/}comment_approved').text 288 is_public = True 289 is_removed = False 290 if approvation != '1': 291 is_removed = True 292 if approvation == 'spam': 293 is_public = False 294 295 comment_dict = {'content_object': entry, 296 'site': self.SITE, 297 'user_name': comment_node.find( 298 '{http://wordpress.org/export/1.0/}comment_author/').text[:50], 299 'user_email': comment_node.find( 300 '{http://wordpress.org/export/1.0/}comment_author_email/').text or '', 301 'user_url': comment_node.find( 302 '{http://wordpress.org/export/1.0/}comment_author_url/').text or '', 303 'comment': content, 304 'submit_date': submit_date, 305 'ip_address': comment_node.find( 306 '{http://wordpress.org/export/1.0/}comment_author_IP/').text or '', 307 'is_public': is_public, 308 'is_removed': is_removed, } 309 comment = Comment(**comment_dict) 310 comment.save() 311 if approvation == 'spam': 312 comment.flags.create(user=entry.authors.all()[0], flag='spam') 313 if is_pingback: 314 comment.flags.create(user=entry.authors.all()[0], flag='pingback') 315 if is_trackback: 316 comment.flags.create(user=entry.authors.all()[0], flag='trackback') 317 318 self.write_out(self.style.ITEM('OK\n'))
319