main.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. import torch
  5. import torch.nn as nn
  6. from torch.utils.data import TensorDataset, DataLoader
  7. from sklearn.preprocessing import MinMaxScaler
  8. import matplotlib.pyplot as plt
  9. # 获取当前文件的绝对路径并构建csv文件路径
  10. # 获取 main.py 所在的目录
  11. current_dir = os.path.dirname(os.path.abspath(__file__))
  12. # 构建CSV文件的相对路径(相对于main.py)
  13. csv_relative_path = os.path.join('..', '..', 'huinongbao-app', 'src', 'assets', '慧农宝_final.csv')
  14. # 将当前目录与CSV文件的相对路径组合成完整路径,并规范化它
  15. full_name = os.path.normpath(os.path.join(current_dir, csv_relative_path))
  16. print(f"Full path to CSV: {full_name}")
  17. # 读取CSV文件并确保日期列是datetime类型,并设置为索引
  18. df = pd.read_csv(full_name, sep=',', encoding='utf-8', engine='python')
  19. # 检查数据是否存在,确保顶部数据是2024-12-18,脐橙,8.5,101.74
  20. expected_top_row = ['2024-12-18', '脐橙', '8.5', '101.74']
  21. if not df.empty and df.iloc[0].astype(str).tolist() != expected_top_row:
  22. # 如果顶部数据不匹配,输出提示并退出
  23. print("当前CSV文件的顶部数据不符合预期。请检查文件。")
  24. exit()
  25. # 提示用户输入数据
  26. num_entries = int(input("请输入要添加的条目数量: "))
  27. user_data = {
  28. "数值": [],
  29. "指数": []
  30. }
  31. for i in range(num_entries):
  32. value = input(f"请输入第 {i + 1} 个条目的数值: ")
  33. index = input(f"请输入第 {i + 1} 个条目的指数: ")
  34. user_data["数值"].append(value)
  35. user_data["指数"].append(index)
  36. # 创建新的DataFrame用于用户输入的数据
  37. new_data = pd.DataFrame({
  38. '日期': pd.date_range(start='2024-12-19', periods=num_entries, freq='D').date, # 仅保留日期部分
  39. '品种': ['脐橙'] * num_entries,
  40. '数值': user_data["数值"],
  41. '指数': user_data["指数"],
  42. })
  43. # 将新的数据插入到DataFrame的顶部
  44. df = pd.concat([new_data, df], ignore_index=True)
  45. # 保存更新后的DataFrame到CSV文件
  46. df.to_csv(full_name, index=False, encoding='utf-8')
  47. print("CSV文件已更新。")
  48. df['日期'] = pd.to_datetime(df['日期'])
  49. df.set_index('日期', inplace=True)
  50. print(df.head(1))
  51. # 只选择脐橙的数据,并按日期升序排序
  52. orange_df = df[df['品种'] == '脐橙'].sort_index(ascending=True)
  53. # 初始化变量
  54. dataX = [] # 属性
  55. dataY = [] # 标签
  56. history_days = 200
  57. future_days = 18
  58. # 创建一个滑动窗口来获取历史数据和未来数据
  59. for i in range(len(orange_df) - history_days - future_days + 1):
  60. tempX = orange_df[['数值', '指数']].iloc[i:(i + history_days)].values.tolist()
  61. tempY = orange_df[['数值', '指数']].iloc[(i + history_days):(i + history_days + future_days)].values.tolist()
  62. dataX.append(tempX)
  63. dataY.append(tempY)
  64. # 转换为numpy数组
  65. dataX = np.array(dataX)
  66. dataY = np.array(dataY)
  67. print("dataX shape:", dataX.shape)
  68. print("dataY shape:", dataY.shape)
  69. # 数据标准化
  70. scaler_X = MinMaxScaler(feature_range=(0, 1))
  71. scaler_Y = MinMaxScaler(feature_range=(0, 1))
  72. dataX_scaled = scaler_X.fit_transform(dataX.reshape(-1, 2)).reshape(dataX.shape)
  73. dataY_scaled = scaler_Y.fit_transform(dataY.reshape(-1, 2)).reshape(dataY.shape)
  74. # 转换为Tensor
  75. dataX_tensor = torch.tensor(dataX_scaled, dtype=torch.float32)
  76. dataY_tensor = torch.tensor(dataY_scaled, dtype=torch.float32).view(dataY.shape[0], -1) # 将标签转换为适合模型输出的形状
  77. # 创建数据集和加载器
  78. dataset = TensorDataset(dataX_tensor, dataY_tensor)
  79. train_size = int(0.8 * len(dataset))
  80. test_size = len(dataset) - train_size
  81. train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
  82. train_loader = DataLoader(
  83. dataset=train_dataset,
  84. batch_size=64,
  85. shuffle=True
  86. )
  87. test_loader = DataLoader(
  88. dataset=test_dataset,
  89. batch_size=64,
  90. shuffle=False
  91. )
  92. # 定义模型(例如使用LSTM)
  93. class LSTMModel(nn.Module):
  94. def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
  95. super(LSTMModel, self).__init__()
  96. self.hidden_dim = hidden_dim
  97. self.layer_dim = layer_dim
  98. self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
  99. self.fc = nn.Linear(hidden_dim, output_dim)
  100. def forward(self, x):
  101. h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
  102. c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
  103. out, _ = self.lstm(x, (h0, c0))
  104. out = self.fc(out[:, -1, :])
  105. return out
  106. input_dim = 2
  107. hidden_dim = 50
  108. layer_dim = 2
  109. output_dim = 2 * future_days # 因为我们预测的是18天的两个特征
  110. model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
  111. criterion = nn.MSELoss()
  112. optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
  113. # 初始化损失列表
  114. lossList = []
  115. lossListTest = []
  116. num_epochs = 100
  117. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  118. model.to(device)
  119. for epoch in range(num_epochs):
  120. model.train()
  121. running_loss = 0.0
  122. # 训练模型
  123. for i, (inputs, labels) in enumerate(train_loader):
  124. inputs, labels = inputs.to(device), labels.to(device)
  125. # 前向传播
  126. outputs = model(inputs)
  127. loss = criterion(outputs, labels)
  128. # 反向传播和优化
  129. optimizer.zero_grad()
  130. loss.backward()
  131. optimizer.step()
  132. running_loss += loss.item()
  133. avg_train_loss = running_loss / len(train_loader)
  134. lossList.append(avg_train_loss)
  135. # 测试模型
  136. model.eval()
  137. test_loss = 0.0
  138. with torch.no_grad():
  139. predictions, actuals = [], []
  140. for inputs, labels in test_loader:
  141. inputs, labels = inputs.to(device), labels.to(device)
  142. outputs = model(inputs)
  143. loss = criterion(outputs, labels)
  144. test_loss += loss.item()
  145. predictions.append(outputs.cpu().numpy())
  146. actuals.append(labels.cpu().numpy())
  147. avg_test_loss = test_loss / len(test_loader)
  148. lossListTest.append(avg_test_loss) # 在每个epoch后添加测试损失
  149. print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}')
  150. # 绘制损失下降图
  151. # plt.figure(figsize=(10, 6))
  152. # plt.plot(list(range(1, num_epochs + 1)), lossList, label='Train')
  153. # plt.plot(list(range(1, num_epochs + 1)), lossListTest, label='Test')
  154. # plt.legend()
  155. # plt.xlabel('epoch')
  156. # plt.ylabel('loss')
  157. # plt.title('Loss over epochs')
  158. # plt.show()
  159. latest_data = orange_df[['数值', '指数']].iloc[-history_days:].values
  160. # 标准化最新数据
  161. latest_data_scaled = scaler_X.transform(latest_data)
  162. # 转换为张量并添加批次维度
  163. latest_data_tensor = torch.tensor(latest_data_scaled, dtype=torch.float32).unsqueeze(0).to(device)
  164. # 模型推理
  165. model.eval()
  166. with torch.no_grad():
  167. future_predictions = model(latest_data_tensor)
  168. # 反标准化预测值
  169. future_predictions_unscaled = scaler_Y.inverse_transform(future_predictions.cpu().numpy().reshape(-1, 2))
  170. print("预测的未来18天的数值 (价格) 和指数:")
  171. for i in range(future_days):
  172. print(f"Day {i+1}: 数值 (价格) = {future_predictions_unscaled[i, 0]:.2f}, 指数 = {future_predictions_unscaled[i, 1]:.2f}")
  173. # plt.figure(figsize=(14, 7))
  174. # days = list(range(1, future_days + 1))
  175. # plt.plot(days, future_predictions_unscaled[:, 0], label='预测值 (数值)', marker='o')
  176. # plt.plot(days, future_predictions_unscaled[:, 1], label='预测值 (指数)', marker='x')
  177. # plt.title('预测的未来18天的数值 (价格) 和指数')
  178. # plt.xlabel('天数')
  179. # plt.ylabel('值')
  180. # plt.legend()
  181. # plt.show()
  182. # # 测试模型
  183. # model.eval()
  184. # with torch.no_grad():
  185. # predictions, actuals = [], []
  186. # for inputs, labels in test_loader:
  187. # inputs, labels = inputs.to(device), labels.to(device).view(labels.size(0), -1)
  188. # outputs = model(inputs)
  189. # predictions.append(outputs.cpu().numpy())
  190. # actuals.append(labels.cpu().numpy())
  191. #
  192. # predictions = np.concatenate(predictions)
  193. # actuals = np.concatenate(actuals)
  194. #
  195. # # 反标准化
  196. # predictions_unscaled = scaler_Y.inverse_transform(predictions.reshape(-1, 2))
  197. # actuals_unscaled = scaler_Y.inverse_transform(actuals.reshape(-1, 2))
  198. # # 可视化结果
  199. # plt.figure(figsize=(14, 18))
  200. # plt.plot(actuals_unscaled[:, 0], label='真实值 (数值)')
  201. # plt.plot(predictions_unscaled[:, 0], label='预测值 (数值)', linestyle='--')
  202. # plt.plot(actuals_unscaled[:, 1], label='真实值 (指数)')
  203. # plt.plot(predictions_unscaled[:, 1], label='预测值 (指数)', linestyle='--')
  204. # plt.title('脐橙 数值 和 指数 预测 vs 真实值')
  205. # plt.xlabel('样本编号')
  206. # plt.ylabel('值')
  207. # plt.legend()
  208. # plt.show()