爬虫写入数据库

This commit is contained in:
zhushenwudi 2025-01-14 15:23:59 +08:00
parent 33e34c680e
commit da848c9128
4 changed files with 131 additions and 59 deletions

View File

@ -14,11 +14,14 @@ dependencies {
testImplementation 'org.jetbrains.kotlin:kotlin-test'
implementation('org.jetbrains.kotlinx:kotlinx-serialization-json:1.7.1')
implementation 'org.jsoup:jsoup:1.18.3'
implementation "org.ktorm:ktorm-core:4.1.1"
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3'
implementation("mysql:mysql-connector-java:8.0.33")
}
test {
useJUnitPlatform()
}
kotlin {
jvmToolchain(21)
jvmToolchain(17)
}

View File

@ -1,3 +1,5 @@
import database.TCard
import kotlinx.coroutines.delay
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
@ -6,10 +8,12 @@ import model.Card
import model.GameCh
import model.parseType
import org.jsoup.Jsoup
import org.ktorm.database.Database
import org.ktorm.dsl.*
import java.io.File
@ExperimentalSerializationApi
fun main(args: Array<String>) {
suspend fun main(args: Array<String>) {
val cardList = readJson<List<Card>>(fileName = "card.json")
cardList.forEach { card ->
card.illustId = "c_${card.illustId}.png"
@ -18,62 +22,100 @@ fun main(args: Array<String>) {
val gameList = readJson<List<GameCh>>(fileName = "gamerch.json")
val database = Database.connect(
url = "jdbc:mysql://localhost:3306/sif_card",
driver = "com.mysql.jdbc.Driver",
user = "root",
password = ""
)
cardList.forEach { card ->
gameList.find { gameCh -> gameCh.name.contains(card.name) }?.let { gameCh ->
println(gameCh.url)
val doc = Jsoup.connect(gameCh.url).get()
val elements = doc.select(".markup.mu")
val tables = elements.select(".mu__table")
tables.forEach { element ->
println(element)
println("------------")
database.useTransaction {
val count = database
.from(TCard)
.select()
.where { TCard.name eq card.name }
.totalRecordsInAllPages
if (count > 0) {
return@forEach
}
card.characterName = elements.select(".mu__index").select("p").text().substringBefore(")").substringAfter("")
gameList.find { gameCh -> gameCh.name.contains(card.name) }?.let { gameCh ->
println(gameCh.url)
val doc = Jsoup.connect(gameCh.url).get()
val elements = doc.select(".markup.mu")
tables[1].select(".mu__table--row1").apply {
card.rarity = select(".mu__table--col2").text()
card.maxLevel = select(".mu__table--col4").text()
}
tables[1].select(".mu__table--row2").apply {
select(".mu__table--col2").text().let {
card.type = parseType(it)
val tables = elements.select(".mu__table")
card.characterName = elements.select(".mu__index").select("p").text()
.substringBefore(")").substringAfter("")
tables[1].select(".mu__table--row1").apply {
card.rarity = select(".mu__table--col2").text()
card.maxLevel = select(".mu__table--col4").text()
}
tables[1].select(".mu__table--row2").apply {
select(".mu__table--col2").text().let {
card.type = parseType(it)
}
}
tables[1].select(".mu__table--row3").apply {
card.group = select(".mu__table--col2").text()
card.team = select(".mu__table--col4").text()
}
tables[1].select(".mu__table--row4").apply {
card.date = select(".mu__table--col2").text().substringBefore("")
}
tables[2].select(".mu__table--row1").apply {
card.maxHp = select(".mu__table--col2").text()
card.smile = select(".mu__table--col4").text()
}
tables[2].select(".mu__table--row2").apply {
card.pure = select(".mu__table--col2").text()
card.cool = select(".mu__table--col4").text()
}
tables[3].select(".mu__table--row1").apply {
card.centerSkillName = select(".mu__table--col1").text()
}
tables[3].select(".mu__table--row2").apply {
card.centerSkillInfo = select(".mu__table--col1").text()
}
tables[4].select(".mu__table--row1").apply {
card.specialSkillName = select(".mu__table--col1").text()
}
tables[4].select(".mu__table--row2").apply {
card.specialSkillInfo = select(".mu__table--col1").text()
}
}
tables[1].select(".mu__table--row3").apply {
card.group = select(".mu__table--col2").text()
card.team = select(".mu__table--col4").text()
}
tables[1].select(".mu__table--row4").apply {
card.date = select(".mu__table--col2").text().substringBefore("")
}
println(card)
tables[2].select(".mu__table--row1").apply {
card.maxHp = select(".mu__table--col2").text()
card.smile = select(".mu__table--col4").text()
}
tables[2].select(".mu__table--row2").apply {
card.pure = select(".mu__table--col2").text()
card.cool = select(".mu__table--col4").text()
}
tables[3].select(".mu__table--row1").apply {
card.centerSkillName = select(".mu__table--col1").text()
}
tables[3].select(".mu__table--row2").apply {
card.centerSkillInfo = select(".mu__table--col1").text()
}
tables[4].select(".mu__table--row1").apply {
card.specialSkillName = select(".mu__table--col1").text()
}
tables[4].select(".mu__table--row2").apply {
card.specialSkillInfo = select(".mu__table--col1").text()
database.insert(TCard) {
set(it.name, card.name)
set(it.characterId, card.characterId)
set(it.characterName, card.characterName)
set(it.type, card.type)
set(it.rarity, card.rarity)
set(it.maxLevel, card.maxLevel)
set(it.maxHp, card.maxHp)
set(it.smile, card.smile)
set(it.pure, card.pure)
set(it.cool, card.cool)
set(it.illustId, card.illustId)
set(it.evolveIllustId, card.evolveIllustId)
set(it.group, card.group)
set(it.team, card.team)
set(it.date, card.date)
set(it.centerSkillName, card.centerSkillName)
set(it.centerSkillInfo, card.centerSkillInfo)
set(it.specialSkillName, card.specialSkillName)
set(it.specialSkillInfo, card.specialSkillInfo)
}
}
println(card)
return
delay(1000)
}
}

View File

@ -0,0 +1,28 @@
package database
import org.ktorm.schema.Table
import org.ktorm.schema.int
import org.ktorm.schema.varchar
object TCard : Table<Nothing>("t_card") {
val id = int("id").primaryKey()
val name = varchar("name")
val characterId = int("character_id")
val characterName = varchar("character_name")
val type = varchar("type")
val rarity = varchar("rarity")
val maxLevel = varchar("max_level")
val maxHp = varchar("max_hp")
val smile = varchar("smile")
val pure = varchar("pure")
val cool = varchar("cool")
val illustId = varchar("illust_id")
val evolveIllustId = varchar("evolve_illust_id")
val group = varchar("group")
val team = varchar("team")
val date = varchar("date")
val centerSkillName = varchar("center_skill_name")
val centerSkillInfo = varchar("center_skill_info")
val specialSkillName = varchar("special_skill_name")
val specialSkillInfo = varchar("special_skill_info")
}

View File

@ -2,13 +2,12 @@ package model
import kotlinx.serialization.Serializable
fun parseType(type: String): Int {
println("parseType: $type")
fun parseType(type: String): String {
return when (type) {
"スマイル" -> 1
"ピュア" -> 2
"クール" -> 3
else -> 1
"スマイル" -> "甜美"
"ピュア" -> "清纯"
"クール" -> "帅气"
else -> "甜美"
}
}
@ -18,13 +17,13 @@ data class Card (
val name: String, // 卡牌名称
val characterId: Int, // 角色id
var characterName: String? = "", // 角色名称
var type: Int? = 1, // 类型 1:甜美 2:清纯 3:帅气
var rarity: String? = "R", // 稀有性 1:UR 2:SR 3:R
var type: String? = "甜美", // 类型
var rarity: String? = "R", // 稀有性
var maxLevel: String? = "", // 最大等级
var maxHp: String? = "1", // 最大血量
var smile: String? = "1", // 甜美值
var cool: String? = "1", // 清纯值
var pure: String? = "1", // 帅气值
var pure: String? = "1", // 清纯值
var cool: String? = "1", // 帅气值
var illustId: String, // 通常图片文件名
var evolveIllustId: String, // 觉醒图片文件名
var group: String? = "", // 所属团组