Skip to content
Snippets Groups Projects
Commit 94d5251a authored by Benjamin Murauer's avatar Benjamin Murauer
Browse files

added calculation of first fc layer size

parent 57c6c538
No related branches found
No related tags found
No related merge requests found
Pipeline #47951 failed
"""Test basic skorch models with CNN network."""
import torch
from dstoolbox.transformers import Padder2d, TextFeaturizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.pipeline import make_pipeline
from torch import nn
from tuhlbox.torch_classifier import TorchClassifier
from tuhlbox.torch_cnn import CharCNN, ConvLayerConfig, FcLayerConfig
from tuhlbox.torch_lstm import RNNClassifier
x, y = fetch_20newsgroups(return_X_y=True)
VOCAB_SIZE = 1000
EMB_DIM = 300
MAX_SEQ_LEN = 100
def test_cnn() -> None:
pipe = make_pipeline(
TextFeaturizer(max_features=VOCAB_SIZE),
Padder2d(pad_value=VOCAB_SIZE, max_len=MAX_SEQ_LEN, dtype=int),
TorchClassifier(
module=CharCNN,
max_seq_len=MAX_SEQ_LEN,
device="cpu",
batch_size=54,
max_epochs=5,
learn_rate=0.01,
optimizer=torch.optim.Adam,
model_kwargs=dict(
module__emb_layer=nn.Embedding(VOCAB_SIZE + 1, EMB_DIM),
module__conv_layer_configs=[
ConvLayerConfig(EMB_DIM, 50, 7, 1, 3, 3),
ConvLayerConfig(50, 50, 5, 1, 3, 3),
],
module__fc_layer_configs=[
FcLayerConfig(None, 256), # will be calculated automagically
FcLayerConfig(256, 128),
FcLayerConfig(128, 64),
],
),
),
)
pipe.fit(x, y)
def test_lstm() -> None:
pipe = make_pipeline(
TextFeaturizer(max_features=VOCAB_SIZE),
Padder2d(pad_value=VOCAB_SIZE, max_len=MAX_SEQ_LEN, dtype=int),
TorchClassifier(
module=RNNClassifier,
device="cuda",
batch_size=54,
max_epochs=5,
learn_rate=0.01,
optimizer=torch.optim.Adam,
),
)
pipe.fit(x, y)
"""Test basic skorch models with CNN network."""
import torch
from dstoolbox.transformers import Padder2d, TextFeaturizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.pipeline import make_pipeline
from skorch import NeuralNetClassifier
from tuhlbox.torch_classifier import TorchClassifier
from tuhlbox.torch_cnn import CharCNN
x, y = fetch_20newsgroups(return_X_y=True)
VOCAB_SIZE = 1000
EMB_DIM = 300
MAX_SEQ_LEN = 100
pipe = make_pipeline(
TextFeaturizer(max_features=VOCAB_SIZE),
Padder2d(pad_value=VOCAB_SIZE, max_len=MAX_SEQ_LEN, dtype=int),
TorchClassifier(
module=CharCNN,
device="cpu",
batch_size=54,
max_epochs=5,
learn_rate=0.01,
optimizer=torch.optim.Adam,
model_kwargs=dict(
module__embedding_dim=EMB_DIM,
module__vocab_size=VOCAB_SIZE,
module__max_seq_length=MAX_SEQ_LEN,
module__conv_layer_configurations=[
(0, 54, 7, 1, 3, 3),
(54, 50, 5, 1, 10, 1),
],
# the first value in this fc config is somehow related to the stride of
# the maxpool, unsure of this.
module__fc_layer_configurations=[350, 256, 128],
),
),
)
pipe.fit(x, y)
"""Test basic skorch models with CNN network."""
import torch
from dstoolbox.transformers import Padder2d, TextFeaturizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.pipeline import make_pipeline
from tuhlbox.torch_classifier import TorchClassifier
from tuhlbox.torch_lstm import RNNClassifier
x, y = fetch_20newsgroups(return_X_y=True)
VOCAB_SIZE = 1000
EMB_DIM = 300
MAX_SEQ_LEN = 100
pipe = make_pipeline(
TextFeaturizer(max_features=VOCAB_SIZE),
Padder2d(pad_value=VOCAB_SIZE, max_len=MAX_SEQ_LEN, dtype=int),
TorchClassifier(
module=RNNClassifier,
device="cuda",
batch_size=54,
max_epochs=5,
learn_rate=0.01,
optimizer=torch.optim.Adam,
),
)
pipe.fit(x, y)
......@@ -22,6 +22,7 @@ class TorchClassifier(ClassifierMixin, BaseEstimator):
def __init__(
self,
module: Type[nn.Module],
max_seq_len: int = None,
batch_size: int = 64,
max_epochs: int = 5,
learn_rate: float = 1e-3,
......@@ -38,12 +39,15 @@ class TorchClassifier(ClassifierMixin, BaseEstimator):
self.wrapped_model: Optional[NeuralNetClassifier] = None
self.optimizer = optimizer
self.label_encoder: LabelEncoder = LabelEncoder()
self.max_seq_len = max_seq_len
def fit(self, x: Any, y: Iterable[Any], **fit_kwargs: Any) -> TorchClassifier:
if self.wrapped_model is None:
classes = set(y)
n_classes = len(classes)
self.model_kwargs["module__n_classes"] = n_classes
if self.max_seq_len is not None:
self.model_kwargs["module__max_seq_len"] = self.max_seq_len
self.wrapped_model = NeuralNetClassifier(
module=self.module,
device=self.device,
......
"""Basic CNN model."""
import math
from collections import namedtuple
from typing import List, Tuple
import torch
import torch.nn as nn
ConvLayerConfig = namedtuple(
"ConvLayerConfig",
[
"in_channels",
"out_channels",
"conv_kernel_size",
"conv_stride",
"max_kernel_size",
"max_stride",
],
)
def _generate_conv_layers(
embedding_dim: int,
conv_layer_configurations: List[Tuple[int, int, int, int, int, int]],
) -> List[nn.Module]:
result: List[nn.Module] = []
for i, layer in enumerate(conv_layer_configurations):
input_size = layer[0] if i > 0 else embedding_dim
FcLayerConfig = namedtuple("FcLayerConfig", ["in_features", "out_features"])
def create_conv_layers(configs: List[ConvLayerConfig]) -> List[nn.Sequential]:
result = []
for config in configs:
result.append(
nn.Sequential(
nn.Conv1d(
in_channels=input_size,
out_channels=layer[1],
kernel_size=(layer[2],),
stride=(layer[3],),
in_channels=config.in_channels,
out_channels=config.out_channels,
kernel_size=(config.conv_kernel_size,),
stride=(config.conv_stride,),
),
nn.ReLU(),
nn.MaxPool1d(kernel_size=layer[4], stride=layer[5]),
)
nn.MaxPool1d(
kernel_size=(config.max_kernel_size,),
stride=(config.max_stride,),
),
),
)
return result
def _generate_fc_layers(
n_classes: int, dropout: float, fc_layer_configurations: List[int]
) -> List[nn.Module]:
result: List[nn.Module] = []
last_output_size = 0
for i, layer in enumerate(fc_layer_configurations):
input_size = layer
if i < len(fc_layer_configurations) - 1:
last_output_size = fc_layer_configurations[i + 1]
print(f"adding fc layer with ({input_size}, {last_output_size})")
result.append(
nn.Sequential(
nn.Linear(input_size, last_output_size),
nn.ReLU(),
nn.Dropout(p=dropout),
)
def create_fc_layers(
fc_configs: List[FcLayerConfig], conv_configs: List[ConvLayerConfig], start_n: int
) -> List[nn.Sequential]:
result = []
start_n = compute_first_fc_layer_input_size(conv_configs, start_n)
for i, config in enumerate(fc_configs):
in_features = start_n if i == 0 else config.in_features
result.append(
nn.Sequential(
nn.Linear(in_features=in_features, out_features=config.out_features)
)
print(f"adding fc layer with ({last_output_size}, {n_classes})")
result.append(nn.Linear(last_output_size, n_classes))
result.append(nn.LogSoftmax(dim=-1))
)
return result
def compute_first_fc_layer_input_size(
conv_configs: List[ConvLayerConfig], n: int
) -> int:
"""
Calculates the input dimension of the first fully connected layer.
See https://datascience.stackexchange.com/a/40991/9281
Args:
conv_configs: Configurations of the convolution layers
n: starting value (max sequence length)
Returns:
the dimension of the first fc layer
"""
def get_output_dim(in_size: int, kernel: int, stride: int) -> int:
return math.floor(((in_size - kernel) / stride) + 1)
for config in conv_configs:
n = get_output_dim(n, config.conv_kernel_size, config.conv_stride)
n = get_output_dim(n, config.max_kernel_size, config.max_stride)
last_conv_out_channels = conv_configs[-1].out_channels
return n * last_conv_out_channels
class CharCNN(nn.Module):
"""Basic CNN model that can be built with variable amounts of layers etc."""
def __init__(
self,
vocab_size: int,
embedding_dim: int,
n_classes: int,
max_seq_length: int,
conv_layer_configurations: List[Tuple[int, int, int, int, int, int]],
fc_layer_configurations: List[int],
dropout: float = 0.0,
max_seq_len: int,
emb_layer: nn.Embedding,
conv_layer_configs: List[ConvLayerConfig],
fc_layer_configs: List[FcLayerConfig],
):
"""
Create a new CNN model.
Args:
vocab_size: Size of the vocabulary
embedding_dim: Size of embedding vectors
n_classes: Number of classes used
max_seq_length: Max. sequence length for each token
dropout: random dropout fraction
"""
super().__init__()
self.embedding_dim = embedding_dim
self.vocab_size = vocab_size
self.dropout = dropout
self.max_seq_length = max_seq_length
self.n_classes = n_classes
self.conv_layer_configurations = conv_layer_configurations
self.fc_layer_configurations = fc_layer_configurations
self.emb = nn.Embedding(
num_embeddings=self.vocab_size + 1,
embedding_dim=self.embedding_dim,
)
self.conv_layers = _generate_conv_layers(
self.embedding_dim, self.conv_layer_configurations
self.emb_layer = emb_layer
self.conv_layers = create_conv_layers(conv_layer_configs)
self.fc_layers = create_fc_layers(
fc_layer_configs, conv_layer_configs, max_seq_len
)
self.fc_layers = _generate_fc_layers(
self.n_classes, self.dropout, self.fc_layer_configurations
self.last_layer = nn.Sequential(
nn.Linear(fc_layer_configs[-1].out_features, n_classes),
nn.LogSoftmax(dim=1),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.emb(x).permute(0, 2, 1)
# the embedding layer returns the values in a different order than is required
# by the convolution layers, so we have to swap them
x = self.emb_layer(x).permute(0, 2, 1)
for conv in self.conv_layers:
x = conv(x)
print(x.size())
# flatten all values
x = x.view(x.size(0), -1)
print(x.size())
for fc in self.fc_layers:
x = fc(x)
return x
return self.last_layer(x)
@staticmethod
def conv_layer(
in_channels: int,
out_channels: int,
conv_kernel: int,
conv_stride: int = 1,
max_kernel: int = 3,
max_stride: int = 3,
) -> nn.Module:
return nn.Sequential(
nn.Conv1d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=(conv_kernel,),
stride=(conv_stride,),
),
nn.ReLU(),
nn.MaxPool1d(max_kernel, max_stride),
)
@staticmethod
def fc_layer(
in_size: int,
out_size: int,
dropout: float = 0.0,
) -> nn.Module:
return nn.Sequential(
nn.Linear(in_features=in_size, out_features=out_size),
nn.ReLU(),
nn.Dropout(p=dropout),
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment