Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion thinc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from .layers import Dropout, Embed, expand_window, HashEmbed, LayerNorm, Linear
from .layers import Maxout, Mish, MultiSoftmax, Relu, softmax_activation, Softmax, LSTM
from .layers import CauchySimilarity, ParametricAttention, Logistic
from .layers import resizable, sigmoid_activation, Sigmoid, SparseLinear
from .layers import resizable, sigmoid_activation, Sigmoid, SparseLinear, SparseLinear_v2
from .layers import ClippedLinear, ReluK, HardTanh, HardSigmoid
from .layers import Dish, HardSwish, HardSwishMobilenet, Swish, Gelu
from .layers import PyTorchWrapper, PyTorchRNNWrapper, PyTorchLSTM
Expand Down
2 changes: 1 addition & 1 deletion thinc/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .sigmoid import Sigmoid
from .softmax_activation import softmax_activation
from .softmax import Softmax, Softmax_v2
from .sparselinear import SparseLinear
from .sparselinear import SparseLinear, SparseLinear_v2
from .tensorflowwrapper import TensorFlowWrapper, keras_subclass
from .mxnetwrapper import MXNetWrapper

Expand Down
63 changes: 48 additions & 15 deletions thinc/layers/sparselinear.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@ def SparseLinear(nO: Optional[int] = None, length: int = 2 ** 18):
init=init,
params={"W": None, "b": None},
dims={"nO": nO, "length": length},
attrs={"invalid_indexing": True},
)


@cython.binding(True)
@registry.layers("SparseLinear.v2")
def SparseLinear_v2(nO: Optional[int] = None, length: int = 2 ** 18):
# NB: We can't have generic return type annotation if we want function to
# be bound (and inspectable): https://github.com/cython/cython/issues/2753
return Model(
"sparse_linear",
forward,
init=init,
params={"W": None, "b": None},
dims={"nO": nO, "length": length},
attrs={"invalid_indexing": False},
)


Expand Down Expand Up @@ -70,11 +86,12 @@ def _begin_cpu_update(model, np.ndarray keys, np.ndarray values, np.ndarray leng
cdef np.ndarray W = model.get_param("W")
cdef np.ndarray b = model.get_param("b")
cdef np.ndarray scores = model.ops.alloc((len(lengths), nO))
cdef bint invalid_indexing = model.attrs["invalid_indexing"]
scores += b
set_scoresC(<float*>scores.data,
<uint64_t*>keys.data, <float*>values.data, <int32_t*>lengths.data,
lengths.shape[0], nO,
<float*>W.data, length)
<float*>W.data, length, invalid_indexing)
return scores, _finish_linear_update(model, keys, values, lengths)


Expand All @@ -95,10 +112,10 @@ class _finish_linear_update:
cdef np.ndarray keys = self.keys
cdef np.ndarray values = self.values
cdef np.ndarray lengths = self.lengths
cdef bint invalid_indexing = self.model.attrs["invalid_indexing"]
set_gradientC(<float*>d_weights.data,
<uint64_t*>keys.data, <float*>values.data, <int32_t*>lengths.data,
lengths.shape[0], nO,
&d_scores[0,0], length)
lengths.shape[0], nO, &d_scores[0,0], length, invalid_indexing)
cdef int i, j
for i in range(d_scores.shape[0]):
for j in range(d_scores.shape[1]):
Expand All @@ -110,41 +127,57 @@ class _finish_linear_update:

cdef void set_scoresC(float* scores,
const uint64_t* keys, const float* values, const int32_t* lengths,
int batch_size, int nr_out,
const float* weights, int nr_weight) nogil:
int batch_size, int nr_out, const float* weights, int nr_weight,
bint invalid_indexing) nogil:
cdef uint32_t idx1, idx2
cdef uint32_t hash1, hash2
for length in lengths[:batch_size]:
for i in range(length):
hash1 = MurmurHash3_x86_32_uint64(keys[i], 0)
hash2 = MurmurHash3_x86_32_uint64(keys[i], 1)
idx1 = hash1 & (nr_weight-1)
idx2 = hash2 & (nr_weight-1)
if invalid_indexing:
idx1 = hash1 & (nr_weight-1)
idx2 = hash2 & (nr_weight-1)
else:
idx1 = hash1 % nr_weight
idx2 = hash2 % nr_weight
value = values[i]
for clas in range(nr_out):
scores[clas] += weights[idx1 + clas] * value
scores[clas] += weights[idx2 + clas] * value
if invalid_indexing:
scores[clas] += weights[idx1 + clas] * value
scores[clas] += weights[idx2 + clas] * value
else:
scores[clas] += weights[(clas * nr_weight) + idx1] * value
scores[clas] += weights[(clas * nr_weight) + idx2] * value
scores += nr_out
keys += length
values += length


cdef void set_gradientC(float* d_weights,
const uint64_t* keys, const float* values, const int32_t* lengths,
int batch_size, int nr_out,
const float* d_scores, int nr_weight) nogil:
int batch_size, int nr_out, const float* d_scores, int nr_weight,
bint invalid_indexing) nogil:
cdef uint32_t idx1, idx2
cdef uint32_t hash1, hash2
for length in lengths[:batch_size]:
for i in range(length):
hash1 = MurmurHash3_x86_32_uint64(keys[i], 0)
hash2 = MurmurHash3_x86_32_uint64(keys[i], 1)
idx1 = hash1 & (nr_weight-1)
idx2 = hash2 & (nr_weight-1)
if invalid_indexing:
idx1 = hash1 & (nr_weight-1)
idx2 = hash2 & (nr_weight-1)
else:
idx1 = hash1 % nr_weight
idx2 = hash2 % nr_weight
value = values[i]
for clas in range(nr_out):
d_weights[idx1 + clas] += d_scores[clas] * value
d_weights[idx2 + clas] += d_scores[clas] * value
if invalid_indexing:
d_weights[idx1 + clas] += d_scores[clas] * value
d_weights[idx2 + clas] += d_scores[clas] * value
else:
d_weights[(clas * nr_weight) + idx1] += d_scores[clas] * value
d_weights[(clas * nr_weight) + idx2] += d_scores[clas] * value
d_scores += nr_out
keys += length
values += length
Expand Down
1 change: 1 addition & 0 deletions thinc/tests/layers/test_layers_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def assert_data_match(Y, out_data):
# ("CauchySimilarity.v1", {}, (array2d, array2d), array1d),
("ParametricAttention.v1", {}, ragged, ragged),
("SparseLinear.v1", {}, (numpy.asarray([1, 2, 3], dtype="uint64"), array1d, numpy.asarray([1, 1], dtype="i")), array2d),
("SparseLinear.v2", {}, (numpy.asarray([1, 2, 3], dtype="uint64"), array1d, numpy.asarray([1, 1], dtype="i")), array2d),
("remap_ids.v1", {"dtype": "f"}, ["a", 1, 5.0], array2dint)
# fmt: on
]
Expand Down
36 changes: 36 additions & 0 deletions website/docs/api-layers.md
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,42 @@ length, describing the concatenated batch of input features and their values.
The `lengths` array should have one entry per sequence in the batch, and the sum
of the lengths should equal the length of the keys and values array.

<infobox variant="warning">

`SparseLinear` should not be used for new models because it contains an indexing
bug. As a result, only a subset of the weights is used. Use
[`SparseLinear_v2`](#sparselinear_v2) instead.

</infobox>

| Argument | Type | Description |
| ----------- | --------------------------------------------------------- | -------------------------------------------------------- |
| `nO` | <tt>Optional[int]</tt> | The size of the output vectors. |
| `length` | <tt>int</tt> | The size of the weights vector, to be tuned empirically. |
| **RETURNS** | <tt>Model[Tuple[ArrayXd, ArrayXd, ArrayXd], ArrayXd]</tt> | The created layer. |

```python
https://github.com/explosion/thinc/blob/master/thinc/layers/sparselinear.pyx
```

### SparseLinear_v2 {#sparselinear_v2 tag="function"}

<inline-list>

- **Input:** <ndarray>Tuple[ArrayXd, ArrayXd, ArrayXd]</ndarray>
- **Output:** <ndarray>ArrayXd</ndarray>
- **Parameters:** <ndarray shape="nO*length,">W</ndarray>,
<ndarray shape="nO,">b</ndarray>, `length` <tt>int</tt>

</inline-list>

A sparse linear layer using the "hashing trick". Useful for tasks such as text
classification. Inputs to the layer should be a tuple of arrays
`(keys, values, lengths)`, where the `keys` and `values` are arrays of the same
length, describing the concatenated batch of input features and their values.
The `lengths` array should have one entry per sequence in the batch, and the sum
of the lengths should equal the length of the keys and values array.

| Argument | Type | Description |
| ----------- | --------------------------------------------------------- | -------------------------------------------------------- |
| `nO` | <tt>Optional[int]</tt> | The size of the output vectors. |
Expand Down