Ver código fonte

deleted: Chan241206/__pycache__/find_strokes.cpython-39.pyc
deleted: Chan241206/__pycache__/identify_fractals.cpython-39.pyc
deleted: Chan241206/__pycache__/kline_processor.cpython-39.pyc
deleted: Chan241206/__pycache__/stock_processor.cpython-39.pyc
deleted: Chan241206/__pycache__/validate_fractals.cpython-39.pyc
deleted: Chan241206/copy_db.py
deleted: Chan241206/find_strokes.py

Daniel 3 meses atrás
pai
commit
590d0f1365

BIN
.DS_Store


BIN
Chan241206/__pycache__/find_strokes.cpython-39.pyc


BIN
Chan241206/__pycache__/identify_fractals.cpython-39.pyc


BIN
Chan241206/__pycache__/kline_processor.cpython-39.pyc


BIN
Chan241206/__pycache__/stock_processor.cpython-39.pyc


BIN
Chan241206/__pycache__/validate_fractals.cpython-39.pyc


+ 0 - 83
Chan241206/copy_db.py

@@ -1,83 +0,0 @@
-import pymysql
-from tqdm import tqdm
-
-# Remote database configuration
-remote_db_config = {
-    'host': '192.168.31.111',
-    'port': 3307,
-    'user': 'root',
-    'password': 'r6kEwqWU9!v3',
-    'database': 'qmt_stocks_whole'
-}
-
-# Local database configuration
-local_db_config = {
-    'host': 'localhost',
-    'port': 3307,
-    'user': 'root',
-    'password': 'r6kEwqWU9!v3',
-    'database': 'qmt_stocks_whole'
-}
-
-def fetch_tables(cursor):
-    cursor.execute("SHOW TABLES")
-    return [table[0] for table in cursor.fetchall()]
-
-def fetch_table_schema(cursor, table_name):
-    cursor.execute(f"SHOW CREATE TABLE `{table_name}`")
-    return cursor.fetchone()[1]
-
-def fetch_table_data(cursor, table_name):
-    cursor.execute(f"SELECT * FROM `{table_name}`")
-    return cursor.fetchall(), cursor.description
-
-def create_table(cursor, create_table_sql):
-    cursor.execute(create_table_sql)
-
-def insert_data(cursor, table_name, data, description):
-    columns = ", ".join([desc[0] for desc in description])
-    placeholders = ", ".join(["%s"] * len(description))
-    insert_sql = f"INSERT INTO `{table_name}` ({columns}) VALUES ({placeholders})"
-    cursor.executemany(insert_sql, data)
-
-def drop_table_if_exists(cursor, table_name):
-    cursor.execute(f"DROP TABLE IF EXISTS `{table_name}`")
-
-def table_exists(cursor, table_name):
-    cursor.execute(f"SHOW TABLES LIKE '{table_name}'")
-    return cursor.fetchone() is not None
-
-def main():
-    # Connect to remote database
-    remote_conn = pymysql.connect(**remote_db_config)
-    remote_cursor = remote_conn.cursor()
-
-    # Connect to local database
-    local_conn = pymysql.connect(**local_db_config)
-    local_cursor = local_conn.cursor()
-
-    try:
-        # Fetch all tables from remote database
-        tables = fetch_tables(remote_cursor)
-        for table in tqdm(tables, desc="Processing tables"):
-            # Fetch table schema
-            create_table_sql = fetch_table_schema(remote_cursor, table)
-            # Check if table exists in local database
-            if table_exists(local_cursor, table):
-                # Drop table in local database if exists
-                drop_table_if_exists(local_cursor, table)
-            # Create table in local database
-            create_table(local_cursor, create_table_sql)
-            # Fetch table data
-            data, description = fetch_table_data(remote_cursor, table)
-            # Insert data into local database
-            insert_data(local_cursor, table, data, description)
-        local_conn.commit()
-    finally:
-        remote_cursor.close()
-        remote_conn.close()
-        local_cursor.close()
-        local_conn.close()
-
-if __name__ == "__main__":
-    main()

+ 0 - 54
Chan241206/find_strokes.py

@@ -1,54 +0,0 @@
-def find_strokes(fractals, df, min_interval=5):
-    """
-    在此函数中对初步分型(fractals)进行有效性判断和筛选,
-    包括:
-    1. 去除连续同类型分型中较不极端的分型。
-    2. 确保顶底交替和满足最小间隔要求 min_interval。
-    3. 最终根据有效分型构造笔(strokes)。
-
-    :param fractals: 初步的分型点列表 [(index, 'Top'), (index, 'Bottom')]
-    :param df: 去除包含关系的K线数据,包含High、Low
-    :param min_interval: 顶底之间的最小间隔K线数
-    :return: (valid_fractals, strokes)
-        valid_fractals: 有效分型列表 [(index, 'Top'), (index, 'Bottom')]
-        strokes: 笔的列表 [(start_index, end_index)]
-    """
-    if not fractals:
-        return [], []
-
-    # 1. 先对分型列表排序(按index)
-    fractals = sorted(fractals, key=lambda x: x[0])
-
-    valid_fractals = [fractals[0]]  # 将第一个分型作为初始有效分型
-    # 开始进行有效性筛选
-    for i in range(1, len(fractals)):
-        current_idx, current_type = fractals[i]
-        last_idx, last_type = valid_fractals[-1]
-
-        # 如果类型相同,保留更加极端的分型
-        if current_type == last_type:
-            if current_type == 'Top':
-                # 保留更高的顶
-                if df.loc[current_idx, 'High'] > df.loc[last_idx, 'High']:
-                    valid_fractals[-1] = (current_idx, current_type)
-                # 否则保持不变(跳过当前分型)
-            else:  # 'Bottom'
-                # 保留更低的底
-                if df.loc[current_idx, 'Low'] < df.loc[last_idx, 'Low']:
-                    valid_fractals[-1] = (current_idx, current_type)
-                # 否则保持不变
-        else:
-            # 类型不同,检查间隔
-            if current_idx - last_idx - 1 >= min_interval:
-                valid_fractals.append((current_idx, current_type))
-            # 如果间隔不够,不加入
-
-    # 有了有效分型列表后,再构造笔
-    strokes = []
-    for i in range(len(valid_fractals) - 1):
-        start_idx, start_type = valid_fractals[i]
-        end_idx, end_type = valid_fractals[i + 1]
-        # 此时有效分型保证类型交替(如果需要进一步确保,也可再加判断)
-        strokes.append((start_idx, end_idx))
-
-    return valid_fractals, strokes

+ 0 - 15
Chan241206/identify_fractals.py

@@ -1,15 +0,0 @@
-def identify_fractals(df):
-    """
-    标记顶底分型
-    :param df: 无包含关系的 K 线 DataFrame
-    :return: 顶底分型的索引和类型列表 [(index, 'Top'), (index, 'Bottom')]
-    """
-    fractals = []
-    for i in range(1, len(df) - 1):
-        # 顶分型
-        if df.loc[i, 'High'] > df.loc[i-1, 'High'] and df.loc[i, 'High'] > df.loc[i+1, 'High']:
-            fractals.append((i, 'Top'))
-        # 底分型
-        elif df.loc[i, 'Low'] < df.loc[i-1, 'Low'] and df.loc[i, 'Low'] < df.loc[i+1, 'Low']:
-            fractals.append((i, 'Bottom'))
-    return fractals

+ 0 - 53
Chan241206/incremental_update.py

@@ -1,53 +0,0 @@
-import pandas as pd
-
-class IncrementalUpdater:
-    def __init__(self, processor):
-        """
-        初始化增量更新模块
-        :param processor: KlineProcessor 实例
-        """
-        self.processor = processor
-
-    def update_with_new_kline(self, new_kline):
-        """
-        增量处理新增的 K 线
-        """
-        new_kline = pd.DataFrame([new_kline])
-        self.processor.df = pd.concat([self.processor.df, new_kline], ignore_index=True)
-
-        i = len(self.processor.df) - 1
-        while i > 0:
-            # 检查包含关系
-            if (self.processor.df.loc[i, 'High'] <= self.processor.df.loc[i-1, 'High'] and
-                self.processor.df.loc[i, 'Low'] >= self.processor.df.loc[i-1, 'Low']) or \
-            (self.processor.df.loc[i, 'High'] >= self.processor.df.loc[i-1, 'High'] and
-                self.processor.df.loc[i, 'Low'] <= self.processor.df.loc[i-1, 'Low']):
-                
-                # 找到更早的无包含关系的 K 线
-                j = i - 1
-                while j > 0 and \
-                    ((self.processor.df.loc[j, 'High'] <= self.processor.df.loc[j-1, 'High'] and
-                        self.processor.df.loc[j, 'Low'] >= self.processor.df.loc[j-1, 'Low']) or \
-                    (self.processor.df.loc[j, 'High'] >= self.processor.df.loc[j-1, 'High'] and
-                        self.processor.df.loc[j, 'Low'] <= self.processor.df.loc[j-1, 'Low'])):
-                    j -= 1
-
-                # 确定处理方向
-                if self.processor.df.loc[j, 'High'] < self.processor.df.loc[i-1, 'High']:
-                    # 当前包含关系为向上处理
-                    self.processor.df.loc[j, 'High'] = max(self.processor.df.loc[i, 'High'],
-                                                        self.processor.df.loc[j, 'High'])
-                    self.processor.df.loc[j, 'Low'] = max(self.processor.df.loc[i, 'Low'],
-                                                        self.processor.df.loc[j, 'Low'])
-                else:
-                    # 当前包含关系为向下处理
-                    self.processor.df.loc[j, 'High'] = min(self.processor.df.loc[i, 'High'],
-                                                        self.processor.df.loc[j, 'High'])
-                    self.processor.df.loc[j, 'Low'] = min(self.processor.df.loc[i, 'Low'],
-                                                        self.processor.df.loc[j, 'Low'])
-
-                # 删除当前 K 线
-                self.processor.df = self.processor.df.drop(index=i).reset_index(drop=True)
-            else:
-                break
-            i -= 1

+ 0 - 55
Chan241206/kline_processor.py

@@ -1,55 +0,0 @@
-import pandas as pd
-
-class KlineProcessor:
-    def __init__(self, df):
-        """
-        初始化,处理历史数据中的包含关系
-        :param df: 历史K线数据,包含 'High' 和 'Low' 列
-        """
-        self.df = self.process_inclusion_globally(df)
-    
-    def process_inclusion_globally(self, df):
-        """
-        全局处理包含关系,结合更早的 K 线进行方向判断
-        :param df: 包含 'High' 和 'Low' 列的 DataFrame
-        :return: 处理完成的无包含关系的 DataFrame
-        """
-        i = 1
-        while i < len(df):
-            # 检查 i 和 i-1 是否有包含关系
-            if (df.loc[i, 'High'] <= df.loc[i-1, 'High'] and df.loc[i, 'Low'] >= df.loc[i-1, 'Low']) or \
-            (df.loc[i, 'High'] >= df.loc[i-1, 'High'] and df.loc[i, 'Low'] <= df.loc[i-1, 'Low']):
-
-                # 如果有包含关系,判断方向并合并
-                if i - 2 >= 0:  # 检查是否存在 i-2
-                    # 如果 i-2 存在,基于 i-2 和 i-1 的关系判断方向
-                    if df.loc[i-1, 'High'] <= df.loc[i-2, 'High']:
-                        # 当前为 LL 合并
-                        df.loc[i-1, 'High'] = min(df.loc[i, 'High'], df.loc[i-1, 'High'])
-                        df.loc[i-1, 'Low'] = min(df.loc[i, 'Low'], df.loc[i-1, 'Low'])
-                    else:
-                        # 当前为 HH 合并
-                        df.loc[i-1, 'High'] = max(df.loc[i, 'High'], df.loc[i-1, 'High'])
-                        df.loc[i-1, 'Low'] = max(df.loc[i, 'Low'], df.loc[i-1, 'Low'])
-                else:
-                    # 如果 i-2 不存在,仅根据 i 和 i-1 判断方向
-                    if df.loc[i, 'High'] <= df.loc[i-1, 'High']:
-                        # 当前为 LL 合并
-                        df.loc[i-1, 'High'] = min(df.loc[i, 'High'], df.loc[i-1, 'High'])
-                        df.loc[i-1, 'Low'] = min(df.loc[i, 'Low'], df.loc[i-1, 'Low'])
-                    else:
-                        # 当前为 HH 合并
-                        df.loc[i-1, 'High'] = max(df.loc[i, 'High'], df.loc[i-1, 'High'])
-                        df.loc[i-1, 'Low'] = max(df.loc[i, 'Low'], df.loc[i-1, 'Low'])
-
-                # 删除当前 K 线
-                df = df.drop(index=i).reset_index(drop=True)
-
-                # 回溯到前一根,重新检查合并后的关系
-                i = max(i - 1, 1)
-            else:
-                # 如果没有包含关系,检查下一根 K 线
-                i += 1
-        print(df.head())
-        return df
-            

+ 0 - 133
Chan241206/main.py

@@ -1,133 +0,0 @@
-import pandas as pd
-from stock_processor import get_stock_data
-from kline_processor import KlineProcessor
-from validate_fractals import identify_fractals
-from find_strokes import find_strokes  # 导入 find_strokes
-import os
-from multiprocessing import Pool, cpu_count
-from functools import partial
-import pymysql
-
-# 数据库配置
-db_config = {
-    'host': 'localhost',
-    'port': 3307,
-    'user': 'root',
-    'password': 'r6kEwqWU9!v3',
-    'database': 'qmt_stocks_whole'
-}
-
-def get_stock_list(db_config):
-    """
-    从数据库中获取股票列表。
-    :param db_config: 数据库配置信息
-    :return: 股票表名列表
-    """
-    try:
-        connection = pymysql.connect(
-            host=db_config['host'],
-            port=db_config['port'],
-            user=db_config['user'],
-            password=db_config['password'],
-            database=db_config['database']
-        )
-        cursor = connection.cursor()
-        # 查询所有股票表名,假设所有股票表在 'qmt_stocks_whole' 数据库中
-        cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = %s", (db_config['database'],))
-        results = cursor.fetchall()
-        stock_list = [row[0] for row in results]
-        cursor.close()
-        connection.close()
-        return stock_list
-    except Exception as e:
-        print(f"获取股票列表失败:{e}")
-        return []
-
-def process_stock(table_name, db_config, output_dir):
-    """
-    处理单只股票数据的完整流程:
-    1. 调取元数据
-    2. 处理包含关系
-    3. 标记初步的顶底分型
-    4. 验证分型
-    5. 生成笔
-    """
-    cleaned_df, fractals, strokes = None, None, None
-    try:
-        # 1. 调取元数据
-        df = get_stock_data(table_name, db_config)
-        if df.empty:
-            print(f"{table_name} 数据为空,跳过。")
-            return
-        
-        # 2. 处理包含关系
-        processor = KlineProcessor(df)
-        cleaned_df = processor.df
-
-         # 第3步:获取初步分型(此时validate_fractals仅返回初步的fractals)
-        fractals = identify_fractals(cleaned_df)
-
-        # 第4步:使用find_strokes进行有效性判断和生成笔
-        valid_fractals, strokes = find_strokes(fractals, cleaned_df)
-
-        # 第5步:导出结果
-        export_to_csv(cleaned_df, valid_fractals, strokes, output_dir, table_name)
-        print(f"{table_name} 处理完成。")
-    except Exception as e:
-        print(f"处理 {table_name} 时出错:{e}")
-
-def export_to_csv(df, fractals, strokes, output_dir, table_name):
-    """
-    将数据框、分型信息和笔信息导出为 CSV 文件。
-    仅标记有效的顶底分型和笔的起止点。
-    :param df: 数据框,包含 K 线数据
-    :param fractals: 分型点列表,格式为 [(index, 'Top'), (index, 'Bottom')]
-    :param strokes: 笔的起止点列表 [(start_index, end_index)]
-    :param output_dir: 导出文件的目录
-    :param table_name: 股票表名
-    """
-    # 添加分型列
-    df['Fractal'] = ""
-    for idx, fractal_type in fractals:
-        df.at[idx, 'Fractal'] = fractal_type
-
-    # 添加笔列,使用两个独立的列标记起点和终点
-    df['Stroke_Start'] = ""
-    df['Stroke_End'] = ""
-
-    # 标记笔的起点和终点
-    for i, (start, end) in enumerate(strokes, 1):
-        if start < len(df):
-            df.at[start, 'Stroke_Start'] = f'Stroke{i}_Start'
-        if end < len(df):
-            df.at[end, 'Stroke_End'] = f'Stroke{i}_End'
-
-    # 导出为 CSV,不进行数据过滤
-    os.makedirs(output_dir, exist_ok=True)  # 确保目录存在
-    output_file = os.path.join(output_dir, f"{table_name}_result.csv")
-    df.to_csv(output_file, index=False, encoding='utf-8-sig')
-    print(f"数据已成功导出到 {output_file}")
-
-def main():
-    output_dir = "./output"  # 导出文件的目录
-
-    # 获取股票列表
-    stock_list = get_stock_list(db_config)
-    if not stock_list:
-        print("未获取到任何股票表名,程序终止。")
-        return
-
-    # 限制处理前20个股票
-    stock_list = stock_list[:20]
-    print(f"共获取到 {len(stock_list)} 只股票,开始处理。")
-
-    # 使用多进程
-    pool_size = cpu_count()
-    print(f"使用 {pool_size} 个进程进行并行处理。")
-    with Pool(pool_size) as pool:
-        pool.map(partial(process_stock, db_config=db_config, output_dir=output_dir), stock_list)
-
-    print("前20只股票数据处理完成。")
-
-if __name__ == "__main__":
-    main()

+ 0 - 48
Chan241206/stock_processor.py

@@ -1,48 +0,0 @@
-import pandas as pd
-import pymysql
-
-def get_stock_data(table_name, db_config):
-    """
-    从数据库中读取指定股票的数据并进行格式转换和清洗。
-    :param table_name: 数据表名称
-    :param db_config: 数据库配置信息,包含 host, port, user, password, database
-    :return: 清洗后的 Pandas DataFrame
-    """
-    try:
-        # 连接数据库
-        connection = pymysql.connect(
-            host=db_config['host'],
-            port=db_config['port'],
-            user=db_config['user'],
-            password=db_config['password'],
-            database=db_config['database']
-        )
-        print(f"开始读取表:{table_name}")
-
-        # 查询数据
-        query = f"SELECT time, open_back, high_back, low_back, close_back, volume_back FROM `{table_name}`"
-        df = pd.read_sql(query, connection)  # 读取数据到 DataFrame,注意数据库连接方式
-        connection.close()
-
-        if df.empty:
-            print(f"{table_name} 数据为空")
-            return pd.DataFrame()
-
-        # 格式转换和清洗
-        df.rename(columns={
-            'time': 'Date',
-            'high_back': 'High',
-            'low_back': 'Low',
-            'open_back': 'Open',
-            'close_back': 'Close',
-            'volume_back': 'Volume'
-        }, inplace=True)
-
-        df['Date'] = pd.to_datetime(df['Date'])
-        print(f"完成数据清洗:{table_name}")
-        print(df.head())
-        return df
-
-    except Exception as e:
-        print(f"读取 {table_name} 数据失败:{e}")
-        return pd.DataFrame()

+ 0 - 24
Chan241206/validate_fractals.py

@@ -1,24 +0,0 @@
-def identify_fractals(df):
-    """
-    根据基本定义识别初步的顶底分型。
-    不进行有效性判定(如间隔、极值强化、交替性等)。
-
-    分型定义(3K线窗口):
-    - 顶分型(Top): 中间K线的High大于相邻两K线的High
-    - 底分型(Bottom): 中间K线的Low小于相邻两K线的Low
-
-    :param df: 已经去除包含关系的K线DataFrame,要求有 'High'、'Low' 列
-    :return: 分型列表 [(index, 'Top'), (index, 'Bottom')],按时间顺序列出
-    """
-    fractals = []
-    # 遍历从第1根K线到倒数第2根K线(i-1、i、i+1可用)
-    for i in range(1, len(df) - 1):
-        # 顶分型判断
-        if df.loc[i, 'High'] > df.loc[i - 1, 'High'] and df.loc[i, 'High'] > df.loc[i + 1, 'High']:
-            fractals.append((i, 'Top'))
-
-        # 底分型判断
-        elif df.loc[i, 'Low'] < df.loc[i - 1, 'Low'] and df.loc[i, 'Low'] < df.loc[i + 1, 'Low']:
-            fractals.append((i, 'Bottom'))
-
-    return fractals

+ 1 - 0
Chan241215

@@ -0,0 +1 @@
+Subproject commit b5e3ef692b91d4b437e5ea13fbefe5c635d933f0