I
I
Ilya Neizvestnyj2019-10-09 11:47:21
Python
Ilya Neizvestnyj, 2019-10-09 11:47:21

How to predict the weather for the next 24 hours?

How to predict the weather for the next 24 hours from the previous 10 days? For example, we take the first 1440 samples and predict from them. Code and links below
Datasheet - https://www.kaggle.com/pankrzysiu/weather-archive-...
Network:

import os
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop

data_dir = "C:/Users/pikro/Downloads/jena_climate"
fname = os.path.join(data_dir, "jena_climate_2009_2016.csv")

f = open(fname)
data = f.read()
f.close()

lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:]

print(header)
print(len(lines))

"""Parsing the data"""
float_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
    print(i)
    values = [float(x) for x in line.split(',')[1:]]
    float_data[i:] = values

temp = float_data[:, 1]
plt.plot(range(len(temp)), temp)
plt.show()
plt.plot(range(1440), temp[:1440])
plt.show()

# Normalizing the data
mean = float_data[:200000].mean(axis=0)
float_data -= mean
std = float_data[:200000].std(axis=0)
float_data /= std

print(float_data[0])


def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
    """Generator yielding timeseries samples and their targets"""
    if max_index is None:
        max_index = len(data) - delay - 1

    i = min_index + lookback

    while 1:
        if shuffle:
            rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)

        samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(rows),))

        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples, targets


"""Preparing the generators"""
lookback = 1440  # 10 days
step = 6  # 5 hours
delay = 144  # 1 hours
batch_size = 128

train_gen = generator(float_data,
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=200000,
                      shuffle=True,
                      step=step,
                      batch_size=batch_size)

val_gen = generator(float_data,
                    lookback=lookback,
                    delay=delay,
                    min_index=200001,
                    max_index=300001,
                    shuffle=True,
                    step=step,
                    batch_size=batch_size)

test_gen = generator(float_data, lookback=lookback, delay=delay, min_index=300001, max_index=None, step=step,
                     batch_size=batch_size)

val_steps = (300000 - 200001 - lookback) // batch_size
print(val_steps)
test_steps = (len(float_data) - 300001 - lookback) // batch_size
print(test_steps)


def evaluate_naive_method():
    batch_maes = []
    for step in range(val_steps):
        samples, targets = next(val_gen)
        preds = samples[:, -1, 1]
        mae = np.mean(np.abs(preds - targets))
        batch_maes.append(mae)
    return np.mean(batch_maes)

model = Sequential()
model.add(layers.GRU(32, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, float_data.shape[1])))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
                              steps_per_epoch=500,
                              epochs=30,
                              validation_data=val_gen,
                              validation_steps=val_steps)

model.save("weather.h5")

loss = history.history['loss']
val_loss = history.history["val_loss"]

epochs = range(1, 31)
plt.figure()

plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation acc")
plt.legend()

plt.show()

predict = model.predict_generator(test_gen, steps=test_steps)

Tried to do like this:
test_gen = generator(float_data, lookback=lookback, delay=delay, min_index=300001, max_index=None, step=step,
                     batch_size=batch_size)

val_steps = (300000 - 200001 - lookback) // batch_size
test_steps = (len(float_data) - lookback) // batch_size

model = models.load_model("weather.h5")

predict = model.predict_generator(test_gen, test_steps)
print("____")
for i in predict:
    for j in i:
        print(j, j * std[1])
print(len(predict))

But the numbers do not converge (There are absolutely different values. Maybe I’m normalizing it wrong?

Answer the question

In order to leave comments, you need to log in

2 answer(s)
A
Arseny Kravchenko, 2019-10-09
@Arseny_Info

Look at the Yandex reports, they talked about it quite a lot

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question