نقل منتدى XenForo إلى Discourse

ربما سيحتاجها شخص ما في المستقبل.

مستورد معدّل مني مع إصلاح المرفقات، وصور الرموز، والإعجابات (التفاعلات في XenForo الجديد)، ولا يزال الرسائل الخاصة معطلة (لأنني لا أحتاجها).

# frozen_string_literal: true

require "mysql2"

begin
  require "php_serialize" # https://github.com/jqr/php-serialize
rescue LoadError
  puts
  puts "لم يتم العثور على php_serialize."
  puts "أضفها إلى ملف Gemfile، مثل هذا: "
  puts
  puts "echo gem \\'php-serialize\\' >> Gemfile"
  puts "bundle install"
  exit
end

require File.expand_path(File.dirname(__FILE__) + "/base.rb")

# استدعِه بهذه الطريقة:
#   RAILS_ENV=production bundle exec ruby script/import_scripts/xenforo.rb
class ImportScripts::XenForo < ImportScripts::Base
  XENFORO_DB = "import_db"
  TABLE_PREFIX = "xf_"
  BATCH_SIZE = 1000
  ATTACHMENT_DIR = "/shared/attachments"
  AVATAR_DIR = "/shared/avatars"
  def initialize
    super
    @client =
      Mysql2::Client.new(
        host: "localhost",
        username: "root",
        password: "password",
        database: "import_db",
      )

    @category_mappings = {}
    @prefix_as_category = false
  end

  def execute
    import_users
    import_categories
    import_posts
    # import_private_messages
    import_likes
  end

  def import_avatar(id, imported_user)
    filename = File.join(AVATAR_DIR, "l", (id / 1000).to_s, "#{id}.jpg")
    return nil unless File.exist?(filename)
    upload = create_upload(imported_user.id, filename, "avatar_#{id}")
    return if !upload.persisted?
    imported_user.create_user_avatar
    imported_user.user_avatar.update(custom_upload_id: upload.id)
    imported_user.update(uploaded_avatar_id: upload.id)
  end

  def import_users
    puts "", "إنشاء المستخدمين"

    total_count =
      mysql_query(
        "SELECT count(*) count FROM #{TABLE_PREFIX}user WHERE user_state = 'valid' AND is_banned = 0;",
      ).first[
        "count"
      ]

    batches(BATCH_SIZE) do |offset|
      results =
        mysql_query(
          "SELECT user_id id, username, email, custom_title title, register_date created_at,
                last_activity last_visit_time, user_group_id, is_moderator, is_admin, is_staff
         FROM #{TABLE_PREFIX}user
         WHERE user_state = 'valid' AND is_banned = 0
         LIMIT #{BATCH_SIZE}
         OFFSET #{offset};",
        )

      break if results.size < 1

      next if all_records_exist? :users, results.map { |u| u["id"].to_i }

      create_users(results, total: total_count, offset: offset) do |user|
        next if user["username"].blank?
        {
          id: user["id"],
          email: user["email"],
          username: user["username"],
          title: user["title"],
          created_at: Time.zone.at(user["created_at"]),
          last_seen_at: Time.zone.at(user["last_visit_time"]),
          moderator: user["is_moderator"] == 1 || user["is_staff"] == 1,
          admin: user["is_admin"] == 1,
          post_create_action: proc { |u| import_avatar(user["id"], u) },
        }
      end
    end
  end

  def import_categories
    puts "", "جاري استيراد الفئات..."

    categories =
      mysql_query(
        "
        SELECT node_id id,
               title,
               description,
               parent_node_id,
               node_name,
               display_order
          FROM #{TABLE_PREFIX}node
      ORDER BY parent_node_id, display_order
      ",
      ).to_a

    top_level_categories = categories.select { |c| c["parent_node_id"] == 0 }

    create_categories(top_level_categories) do |c|
      {
        id: c["id"],
        name: c["title"],
        description: c["description"],
        position: c["display_order"],
        post_create_action:
          proc do |category|
            url = "board/#{c["node_name"]}"
            Permalink.find_or_create_by(url: url, category_id: category.id)
          end,
      }
    end

    top_level_category_ids = Set.new(top_level_categories.map { |c| c["id"] })

    subcategories = categories.select { |c| top_level_category_ids.include?(c["parent_node_id"]) }

    create_categories(subcategories) do |c|
      {
        id: c["id"],
        name: c["title"],
        description: c["description"],
        position: c["display_order"],
        parent_category_id: category_id_from_imported_category_id(c["parent_node_id"]),
        post_create_action:
          proc do |category|
            url = "board/#{c["node_name"]}"
            Permalink.find_or_create_by(url: url, category_id: category.id)
          end,
      }
    end

    subcategory_ids = Set.new(subcategories.map { |c| c["id"] })

    # الفئات الأعمق تحتاج إلى أن تكون وسوم
    categories.each do |c|
      next if c["parent_node_id"] == 0
      next if top_level_category_ids.include?(c["id"])
      next if subcategory_ids.include?(c["id"])

      # ابحث عن فئة فرعية للمواضيع في هذه الفئة
      parent = c
      while !parent.nil? && !subcategory_ids.include?(parent["id"])
        parent = categories.find { |subcat| subcat["id"] == parent["parent_node_id"] }
      end

      if parent
        tag_name = DiscourseTagging.clean_tag(c["title"])
        @category_mappings[c["id"]] = {
          category_id: category_id_from_imported_category_id(parent["id"]),
          tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name),
        }
      else
        puts "", "تعذر العثور على فئة لـ #{c["id"]} '#{c["title"]}'!"
      end
    end
  end

  # هذه الطريقة بديل لـ import_categories.
  # تستخدم اللواحق بدلاً من العقد.
  def import_categories_from_thread_prefixes
    puts "", "جاري استيراد الفئات..."

    categories =
      mysql_query(
        "
                              SELECT prefix_id id
                              FROM #{TABLE_PREFIX}thread_prefix
                              ORDER BY prefix_id ASC
                            ",
      ).to_a

    create_categories(categories) do |category|
      { id: category["id"], name: "Category-#{category["id"]}" }
    end

    @prefix_as_category = true
  end

  def import_likes
    puts "", "جاري استيراد الإعجابات"
    total_count =
      mysql_query(
        "SELECT COUNT(*) AS count FROM #{TABLE_PREFIX}reaction_content WHERE content_type = 'post'",
      ).first[
        "count"
      ]
    batches(BATCH_SIZE) do |offset|
      results =
        mysql_query(
          "SELECT reaction_content_id, content_id, reaction_user_id, reaction_date
         FROM #{TABLE_PREFIX}reaction_content
         WHERE content_type = 'post'
         ORDER BY reaction_content_id
         LIMIT #{BATCH_SIZE}
         OFFSET #{offset};",
        )
      break if results.size < 1
      create_likes(results, total: total_count, offset: offset) do |row|
        {
          post_id: row["content_id"],
          user_id: row["reaction_user_id"],
          created_at: Time.zone.at(row["reaction_date"]),
        }
      end
    end
  end

  def import_posts
    puts "", "إنشاء المواضيع والمشاركات"

    total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}post").first["count"]

    posts_sql =
      "
        SELECT p.post_id id,
               t.thread_id topic_id,
               #{@prefix_as_category ? "t.prefix_id" : "t.node_id"} category_id,
               t.title title,
               t.first_post_id first_post_id,
               t.view_count,
               p.user_id user_id,
               p.message raw,
               p.post_date created_at
        FROM #{TABLE_PREFIX}post p,
             #{TABLE_PREFIX}thread t
        WHERE p.thread_id = t.thread_id
        AND p.message_state = 'visible'
        AND t.discussion_state = 'visible'
        ORDER BY p.post_date
        LIMIT #{BATCH_SIZE}" # يحتاج إلى OFFSET

    batches(BATCH_SIZE) do |offset|
      results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a

      break if results.size < 1
      next if all_records_exist? :posts, results.map { |p| p["id"] }

      create_posts(results, total: total_count, offset: offset) do |m|
        skip = false
        mapped = {}

        mapped[:id] = m["id"]
        mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
        mapped[:raw] = process_xenforo_post(m["raw"], m["id"])
        mapped[:created_at] = Time.zone.at(m["created_at"])

        if m["id"] == m["first_post_id"]
          if m["category_id"].to_i == 0 || m["category_id"].nil?
            mapped[:category] = SiteSetting.uncategorized_category_id
          else
            mapped[:category] = category_id_from_imported_category_id(m["category_id"].to_i) ||
              @category_mappings[m["category_id"]].try(:[], :category_id)
          end
          mapped[:title] = CGI.unescapeHTML(m["title"])
          mapped[:views] = m["view_count"]
          mapped[:post_create_action] = proc do |pp|
            Permalink.find_or_create_by(url: "threads/#{m["topic_id"]}", topic_id: pp.topic_id)
          end
        else
          parent = topic_lookup_from_imported_post_id(m["first_post_id"])
          if parent
            mapped[:topic_id] = parent[:topic_id]
          else
            puts "المشاركة الأصلية #{m["first_post_id"]} غير موجودة. تخطي #{m["id"]}: #{m["title"][0..40]}"
            skip = true
          end
        end

        skip ? nil : mapped
      end
    end

    # تطبيق الوسوم
    batches(BATCH_SIZE) do |offset|
      results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a
      break if results.size < 1

      results.each do |m|
        next unless m["id"] == m["first_post_id"] && m["category_id"].to_i > 0
        next unless tag = @category_mappings[m["category_id"]].try(:[], :tag)
        next unless topic_mapping = topic_lookup_from_imported_post_id(m["id"])

        topic = Topic.find_by_id(topic_mapping[:topic_id])

        topic.tags = [tag] if topic
      end
    end
  end

  def import_private_messages
    puts "", "جاري استيراد الرسائل الخاصة..."
    post_count = mysql_query("SELECT COUNT(*) count FROM xf_conversation_message").first["count"]
    batches(BATCH_SIZE) do |offset|
      posts = mysql_query <<-SQL
        SELECT c.conversation_id, c.recipients, c.title, m.message, m.user_id, m.message_date, m.message_id, IF(c.first_message_id != m.message_id, c.first_message_id, 0) as topic_id
        FROM xf_conversation_master c
        LEFT JOIN xf_conversation_message m ON m.conversation_id = c.conversation_id
        ORDER BY c.conversation_id, m.message_id
        LIMIT #{BATCH_SIZE}
        OFFSET #{offset}
      SQL
      break if posts.size < 1
      next if all_records_exist? :posts, posts.map { |post| "pm_#{post["message_id"]}" }
      create_posts(posts, total: post_count, offset: offset) do |post|
        user_id = user_id_from_imported_user_id(post["user_id"]) || Discourse::SYSTEM_USER_ID
        title = post["title"]
        message_id = "pm_#{post["message_id"]}"
        raw = process_xenforo_post(post["message"], 0)
        if raw.present?
          msg = {
            id: message_id,
            user_id: user_id,
            raw: raw,
            created_at: Time.zone.at(post["message_date"].to_i),
            import_mode: true,
          }
          if post["topic_id"] <= 0
            topic_id = post["topic_id"]
            if t = topic_lookup_from_imported_post_id("pm_#{topic_id}")
              msg[:topic_id] = t[:topic_id]
            else
              puts "لم يتم العثور على معرف الموضوع #{topic_id}، تخطي المشاركة #{post["message_id"]} من #{post["user_id"]}"
              next
            end
          else
            msg[:title] = post["title"]
            msg[:archetype] = Archetype.private_message
            to_user_array = PHP.unserialize(post["recipients"])
             if to_user_array.size > 0
             discourse_user_ids = to_user_array.keys.map { |id| user_id_from_imported_user_id(id) }
             usernames = User.where(id: [discourse_user_ids]).pluck(:username)
             msg[:target_usernames] = usernames.join(",")
	     
           end
          end
          msg
        else
          puts "رسالة فارغة، تخطي المشاركة #{post["message_id"]}"
          next
        end
      end
    end
  end

  def process_xenforo_post(raw, import_id)
    s = raw.dup

    # :) مشفرة كـ <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
    s.gsub!(%r{<!-- s(\S+) --><img (?:[^>]+) /><!-- s(?:\S+) -->}, '\1')

    # بعض الروابط تبدو هكذا: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
    s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')

    # العديد من وسوم BBCode الخاصة بـ phpBB لها تجزئة ملحقة بها. أمثلة:
    #   [url=https&#58;//google&#46;com:1qh1i7ky]انقر هنا[/url:1qh1i7ky]
    #   [quote=&quot;cybereality&quot;:b0wtlzex]بعض النص.[/quote:b0wtlzex]
    s.gsub!(/:(?:\w{8})\]/, "]")

    # إزالة وسوم الفيديو الخاصة بـ mybb.
    s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")

    s = CGI.unescapeHTML(s)

    # phpBB يقصر نص الرابط بهذه الطريقة، مما يكسر معالجة الماركداون لدينا:
    #   [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
    #
    # إصلاح للخطأ: xenforo.rb: 160: في `gsub!': تسلسل بايت غير صالح في UTF-8 (ArgumentError)
    s = s.encode("UTF-16be", invalid: :replace, replace: "?").encode("UTF-8") if !s.valid_encoding?

    # العمل حولها مؤقتًا:
    s.gsub!(%r{\[http(s)?://(www\.)?}, "[")

    # [QUOTE]...[/QUOTE]
    s.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n> #{$1}\n" }

    # اقتباسات متداخلة
    s.gsub!(%r{(\[/?QUOTE.*?\])}mi) { |q| "\n#{q}\n" }

    # [QUOTE="username, post: 28662, member: 1283"]
    s.gsub!(/\[quote="(\w+), post: (\d*), member: (\d*)"\]/i) do
      username, imported_post_id, _imported_user_id = $1, $2, $3

      topic_mapping = topic_lookup_from_imported_post_id(imported_post_id)

      if topic_mapping
        "\n[quote=\"#{username}, post:#{topic_mapping[:post_number]}, topic:#{topic_mapping[:topic_id]}\"]\n"
      else
        "\n[quote=\"#{username}\"]\n"
      end
    end

    # [URL=...]...[/URL]
    s.gsub!(%r{\[url="?(.+?)"?\](.+?)\[/url\]}i) { "[#{$2}](#{$1})" }

    # [URL]...[/URL]
    s.gsub!(%r{\[url\](.+?)\[/url\]}i) { " #{$1} " }

    # [IMG]...[/IMG]
    s.gsub!(%r{\[/?img\]}i, "")

    # تحويل وسوم القائمة إلى ul ووسوم list=1 إلى ol
    # (بشكل أساسي، نحن نفتقد list=a فقط هنا...)
    s.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]')
    s.gsub!(%r{\[list=1\](.*?)\[/list\]}im, '[ol]\1[/ol]')
    s.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]')
    s.gsub!(%r{\[list=1\](.*?)\[/list:o\]}im, '[ol]\1[/ol]')

    # تحويل وسوم *- إلى وسوم li حتى تتمكن bbcode-to-md من عمل سحرها على قوائم phpBB:
    s.gsub!(/\[\*\]\n/, "")
    s.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
    s.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
    s.gsub!(/\[\*=1\]/, "")

    # [YOUTUBE]<id>[/YOUTUBE]
    s.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }

    # [youtube=425,350]id[/youtube]
    s.gsub!(%r{\[youtube="?(.+?)"?\](.+?)\[/youtube\]}i) do
      "\nhttps://www.youtube.com/watch?v=#{$2}\n"
    end

    # [MEDIA=youtube]id[/MEDIA]
    s.gsub!(%r{\[MEDIA=youtube\](.+?)\[/MEDIA\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }

    # [ame="youtube_link"]title[/ame]
    s.gsub!(%r{\[ame="?(.+?)"?\](.+?)\[/ame\]}i) { "\n#{$1}\n" }

    # [VIDEO=youtube;<id>]...[/VIDEO]
    s.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) do
      "\nhttps://www.youtube.com/watch?v=#{$1}\n"
    end

    # [USER=706]@username[/USER]
    s.gsub!(%r{\[user="?(.+?)"?\](.+?)\[/user\]}i) { $2 }

    # إزالة وسم اللون
    s.gsub!(/\[color=[#a-z0-9]+\]/i, "")
    s.gsub!(%r{\[/color\]}i, "")

    # [HR]...[/HR]
    s.gsub!(%r{\[/?hr\]}i, "\n")

    # [CENTER]...[/CENTER]
    s.gsub!(%r{\[/?CENTER\]}i, "")

    # [HEADING=1]id[/HEADING]
    s.gsub!(%r{\[HEADING=1\](.+?)\[/HEADING\]}i) { "#{$1}\n" } 


    if Dir.exist? ATTACHMENT_DIR
      s = process_xf_attachments(:gallery, s, import_id)
      s = process_xf_attachments(:attachment, s, import_id)
    end

    s
  end

  def process_xf_attachments(xf_type, s, import_id)
    ids = Set.new
    ids.merge(s.scan(get_xf_regexp(xf_type)).map { |x| x[0].to_i })

    # ليس كل المرفقات تحتوي على وسم [ATTACH=] لذا نحتاج إلى الحصول على المعرفات الأخرى من جدول xf_attachment
    if xf_type == :attachment && import_id > 0
      sql =
        "SELECT attachment_id FROM #{TABLE_PREFIX}attachment WHERE content_id=#{import_id} and content_type='post';"
      ids.merge(mysql_query(sql).to_a.map { |v| v["attachment_id"].to_i })
    end

    ids.each do |id|
      next unless id
      sql = get_xf_sql(xf_type, id).dup.squish!
      results = mysql_query(sql)
      if results.size < 1
        # إزالة المرفق
        s.gsub!(get_xf_regexp(xf_type, id), "")
        STDERR.puts "#{xf_type.capitalize} id #{id} غير موجود في قاعدة البيانات المصدر. جاري الإزالة."
        next
      end
      original_filename = results.first["filename"]
      result = results.first
      upload =
        import_xf_attachment(
          result["data_id"],
          result["file_hash"],
          result["user_id"],
          original_filename,
        )
      if upload && upload.present? && upload.persisted?
        html = @uploader.html_for_upload(upload, original_filename)
        s = s + "\n\n#{html}\n\n" unless s.gsub!(get_xf_regexp(xf_type, id), html)
      else
        STDERR.puts "تعذر معالجة الرفع: #{original_filename}. تخطي معرف المرفق #{id}"
      end
    end
    s
  end

  def import_xf_attachment(data_id, file_hash, owner_id, original_filename)
    current_filename = "#{data_id}-#{file_hash}.data"
    path = Pathname.new(ATTACHMENT_DIR + "/#{data_id / 1000}/#{current_filename}")
    new_path = path.dirname + original_filename
    upload = nil
    if File.exist? path
      FileUtils.cp path, new_path
      upload = @uploader.create_upload owner_id, new_path, original_filename
      FileUtils.rm new_path
    else
      STDERR.puts "تعذر العثور على الملف #{path}. تخطي معرف المرفق #{data_id}"
    end
    upload
  end

  def get_xf_regexp(type, id = nil)
    case type
    when :gallery
      Regexp.new(/\[GALLERY=media,\s#{id ? id : '(\d+)'}\].+?\]/i)
    when :attachment
      Regexp.new(%r{\[ATTACH(?>=\w+)?\]#{id ? id : '(\d+)'}\[/ATTACH\]}i)
    end
  end

  def get_xf_sql(type, id)
    case type
    when :gallery
      <<-SQL
        SELECT m.media_id, m.media_title, a.attachment_id, a.data_id, d.filename, d.file_hash, d.user_id
        FROM xengallery_media AS m
        INNER JOIN #{TABLE_PREFIX}attachment a ON (m.attachment_id = a.attachment_id AND a.content_type = 'xengallery_media')
        INNER JOIN #{TABLE_PREFIX}attachment_data d ON a.data_id = d.data_id
        WHERE media_id = #{id}
      SQL
    when :attachment
      <<-SQL
        SELECT a.attachment_id, a.data_id, d.filename, d.file_hash, d.user_id
        FROM #{TABLE_PREFIX}attachment AS a
        INNER JOIN #{TABLE_PREFIX}attachment_data d ON a.data_id = d.data_id
        WHERE attachment_id = #{id}
        AND content_type = 'post'
      SQL
    end
  end

  def mysql_query(sql)
    @client.query(sql, cache_rows: false)
  end
end

ImportScripts::XenForo.new.perform
4 إعجابات