almost there! the first value of the fc_configs is pre-determined by the last...

almost there! the first value of the fc_configs is pre-determined by the last config of the conv-layers, but not sure how.

almost there! the first value of the fc_configs is pre-determined by the last...
almost there! the first value of the fc_configs is pre-determined by the last config of the conv-layers, but not sure how.
57c6c538 · Benjamin Murauer · 02f7483e · 57c6c538 · 57c6c538 · 57c6c538
Commit 57c6c538 authored 3 years ago by Benjamin Murauer
--- a/tests/test_torch_cnn.py
+++ b/tests/test_torch_cnn.py
@@ -5,7 +5,7 @@ from dstoolbox.transformers import Padder2d, TextFeaturizer
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.pipeline import make_pipeline
 from skorch import NeuralNetClassifier
-from tuhlbox.skorch_wrapper import TorchClassifier
+from tuhlbox.torch_classifier import TorchClassifier
 from tuhlbox.torch_cnn import CharCNN

 x, y = fetch_20newsgroups(return_X_y=True)
@@ -19,7 +19,7 @@ pipe = make_pipeline(
    Padder2d(pad_value=VOCAB_SIZE, max_len=MAX_SEQ_LEN, dtype=int),
    TorchClassifier(
        module=CharCNN,
-        device="cuda",
+        device="cpu",
        batch_size=54,
        max_epochs=5,
        learn_rate=0.01,
@@ -28,6 +28,13 @@ pipe = make_pipeline(
            module__embedding_dim=EMB_DIM,
            module__vocab_size=VOCAB_SIZE,
            module__max_seq_length=MAX_SEQ_LEN,
+            module__conv_layer_configurations=[
+                (0, 54, 7, 1, 3, 3),
+                (54, 50, 5, 1, 10, 1),
+            ],
+            # the first value in this fc config is somehow related to the stride of
+            # the maxpool, unsure of this.
+            module__fc_layer_configurations=[350, 256, 128],
        ),
    ),
 )

--- a/tests/test_torch_rnn.py
+++ b/tests/test_torch_rnn.py
@@ -4,7 +4,7 @@ import torch
 from dstoolbox.transformers import Padder2d, TextFeaturizer
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.pipeline import make_pipeline
-from tuhlbox.skorch_wrapper import TorchClassifier
+from tuhlbox.torch_classifier import TorchClassifier
 from tuhlbox.torch_lstm import RNNClassifier

 x, y = fetch_20newsgroups(return_X_y=True)

--- a/tuhlbox/skorch_wrapper.py
+++ b/tuhlbox/skorch_wrapper.py
--- a/tuhlbox/torch_cnn.py
+++ b/tuhlbox/torch_cnn.py
 """Basic CNN model."""
+from typing import List, Tuple
+
 import torch
 import torch.nn as nn


+def _generate_conv_layers(
+    embedding_dim: int,
+    conv_layer_configurations: List[Tuple[int, int, int, int, int, int]],
+) -> List[nn.Module]:
+    result: List[nn.Module] = []
+    for i, layer in enumerate(conv_layer_configurations):
+        input_size = layer[0] if i > 0 else embedding_dim
+        result.append(
+            nn.Sequential(
+                nn.Conv1d(
+                    in_channels=input_size,
+                    out_channels=layer[1],
+                    kernel_size=(layer[2],),
+                    stride=(layer[3],),
+                ),
+                nn.ReLU(),
+                nn.MaxPool1d(kernel_size=layer[4], stride=layer[5]),
+            )
+        )
+    return result
+
+
+def _generate_fc_layers(
+    n_classes: int, dropout: float, fc_layer_configurations: List[int]
+) -> List[nn.Module]:
+    result: List[nn.Module] = []
+    last_output_size = 0
+    for i, layer in enumerate(fc_layer_configurations):
+        input_size = layer
+        if i < len(fc_layer_configurations) - 1:
+            last_output_size = fc_layer_configurations[i + 1]
+            print(f"adding fc layer with ({input_size}, {last_output_size})")
+            result.append(
+                nn.Sequential(
+                    nn.Linear(input_size, last_output_size),
+                    nn.ReLU(),
+                    nn.Dropout(p=dropout),
+                )
+            )
+    print(f"adding fc layer with ({last_output_size}, {n_classes})")
+    result.append(nn.Linear(last_output_size, n_classes))
+    result.append(nn.LogSoftmax(dim=-1))
+    return result
+
+
 class CharCNN(nn.Module):
    """Basic CNN model that can be built with variable amounts of layers etc."""

@@ -12,6 +59,8 @@ class CharCNN(nn.Module):
        embedding_dim: int,
        n_classes: int,
        max_seq_length: int,
+        conv_layer_configurations: List[Tuple[int, int, int, int, int, int]],
+        fc_layer_configurations: List[int],
        dropout: float = 0.0,
    ):
        """
@@ -20,7 +69,7 @@ class CharCNN(nn.Module):
        Args:
            vocab_size: Size of the vocabulary
            embedding_dim: Size of embedding vectors
-            num_classes: Number of classes used
+            n_classes: Number of classes used
            max_seq_length: Max. sequence length for each token
            dropout: random dropout fraction
        """
@@ -29,87 +78,29 @@ class CharCNN(nn.Module):
        self.vocab_size = vocab_size
        self.dropout = dropout
        self.max_seq_length = max_seq_length
-        self.num_classes = n_classes
-
-        # Embedding Input dimensions (x, y):
-        # x: batch size
-        # y: max. seq. length
+        self.n_classes = n_classes
+        self.conv_layer_configurations = conv_layer_configurations
+        self.fc_layer_configurations = fc_layer_configurations

        self.emb = nn.Embedding(
            num_embeddings=self.vocab_size + 1,
            embedding_dim=self.embedding_dim,
        )

-        # Embedding Output dimensions (x, y, z):
-        # x: batch size
-        # y: max. seq. length
-        # z: embedding_dim
-
-        # Conv1D dimensions (x, y, z):
-        # x = batch size
-        # y = number of channels
-        # z = number of features
-
-        self.conv1 = nn.Sequential(
-            nn.Conv1d(
-                in_channels=self.embedding_dim,
-                out_channels=50,
-                kernel_size=(7,),
-                stride=(1,),
-            ),
-            nn.ReLU(),
-            nn.MaxPool1d(kernel_size=3, stride=3),
+        self.conv_layers = _generate_conv_layers(
+            self.embedding_dim, self.conv_layer_configurations
        )
-
-        self.conv2 = nn.Sequential(
-            nn.Conv1d(
-                in_channels=50,
-                out_channels=50,
-                kernel_size=(5,),
-                stride=(1,),
-            ),
-            nn.ReLU(),
-            nn.MaxPool1d(kernel_size=3, stride=3),
+        self.fc_layers = _generate_fc_layers(
+            self.n_classes, self.dropout, self.fc_layer_configurations
        )

-        self.fc1 = nn.Sequential(nn.Linear(450, 256), nn.ReLU(), nn.Dropout(p=dropout))
-
-        self.fc2 = nn.Sequential(nn.Linear(256, 128), nn.ReLU(), nn.Dropout(p=dropout))
-
-        self.fc3 = nn.Linear(128, self.num_classes)
-        self.log_softmax = nn.LogSoftmax(dim=1)
-
    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.emb(x)
-
-        # Embedding Output dimensions (x, y, z):
-        # x: batch size
-        # y: max. seq. length
-        # z: embedding_dim
-
-        # Conv1D dimensions (x, y, z):
-        # x = batch size
-        # y = number of channels
-        # z = number of features
-
-        x = x.permute(0, 2, 1)
-
-        x = self.conv1(x)
-        x = self.conv2(x)
-
-        # collapse
+        x = self.emb(x).permute(0, 2, 1)
+        for conv in self.conv_layers:
+            x = conv(x)
+        print(x.size())
        x = x.view(x.size(0), -1)
-
-        # linear layer
-        x = self.fc1(x)
-
-        # linear layer
-        x = self.fc2(x)
-
-        # linear layer
-        x = self.fc3(x)
-
-        # output layer
-        x = self.log_softmax(x)
-
+        print(x.size())
+        for fc in self.fc_layers:
+            x = fc(x)
        return x