# Установите библиотеку, если у вас ее нет
# !pip install scikit-learn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from typing import Tuple, List
from pandas import Series
from typing import List, Any, Tuple, Dict, Optional, Union
from pandas import DataFrame
from pandas.io.formats.style import Styler

# обратите внимание, что Scikit-Learn импортируется как sklearn
from sklearn import datasets
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.dummy import DummyClassifier, DummyRegressor
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_regression

# фиксируем seed для воспроизводимости результатов
RANDOM_STATE = 42
rng = np.random.RandomState(RANDOM_STATE)

mnist = fetch_openml("mnist_784")

plt.figure(figsize=(15, 3))
num_figures = 20

for i in range(num_figures):
    plt.subplot(2, 10, i + 1)
    # выводим само изображение
    plt.imshow(np.array(mnist["data"])[i].reshape(28, 28), cmap="gray")
    # выводим истинные и предсказанные метки
    plt.title(f"Класс = {mnist['target'][i]}")
    plt.axis("off")
plt.show()

n_samples = len(mnist["data"])
X, y = mnist["data"].to_numpy().reshape(n_samples, -1), mnist["target"].to_numpy()

X.shape, y.shape

((70000, 784), (70000,))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.7, random_state=RANDOM_STATE
)

dummy_clf = DummyClassifier(strategy="most_frequent")

dummy_clf.fit(X_train, y_train)

DummyClassifier(strategy='most_frequent')

dummy_clf.class_prior_, dummy_clf.classes_

(array([0.09887755, 0.1125102 , 0.09912245, 0.10132653, 0.0997551 ,
        0.0897551 , 0.09771429, 0.10295918, 0.09863265, 0.09934694]),
 array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], dtype=object))

y_pred = dummy_clf.predict(X_test)

def get_random_image(
    X: np.ndarray,
    predicted_labels: np.ndarray,
    real_labels: np.ndarray
) -> Tuple[np.ndarray, int, int]:
    """
    Выбирает случайный элемент из выборки и возвращает матрицу изображения,
    метки класса, предсказанные моделью, и реальные.

    Принимает:
    * X - Матрица изображений.
    * predicted_labels - Массив предсказанных меток классов.
    * real_labels -  Массив реальных меток классов.
    Возвращает:
    * random_digit_image - Случайное изображение, преобразованное в матрицу размером 8x8.
    * random_digit_label - Предсказанная метка класса для выбранного изображения.
    * real_label - Реальная метка класса для выбранного изображения.
    """

    # выбираем случайный индекс из тестовой выборки
    random_digit_number = np.random.randint(1, len(y_test))
    # преобразуем вектор признаков обратно в матрицу
    random_digit_image = X[random_digit_number].reshape(int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1])))
    # предсказанная метка
    random_digit_label = predicted_labels[random_digit_number]
    # реальная метка
    real_label = real_labels[random_digit_number]

    return random_digit_image, random_digit_label, real_label

plt.figure(figsize=(13, 2))

for i in range(8):
    plt.subplot(1, 8, i + 1)
    image, predicted_label, real_label = get_random_image(X_test, y_pred, y_test)
    # выводим само изображение
    plt.imshow(image, cmap="gray")
    # выводим истинные и предсказанные метки
    plt.title(f"predicted = {predicted_label} \n real = {real_label}")
    plt.axis("off")
plt.show()

score = accuracy_score(y_test, y_pred)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 11.26%

dummy_clf = DummyClassifier(strategy="uniform")
dummy_clf.fit(X_train, y_train)

DummyClassifier(strategy='uniform')

y_pred = dummy_clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 9.92%

dummy_clf = DummyClassifier(strategy="stratified")
dummy_clf.fit(X_train, y_train)
y_pred = dummy_clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 9.78%

train_ink_mass = X_train.sum(axis=1)
test_ink_mass = X_test.sum(axis=1)

train_ink_mass.shape, test_ink_mass.shape

((49000,), (21000,))

plt.figure(figsize=(10, 4))

# Строим две гистограммы: одну для нулей, другую для единиц
plt.hist(train_ink_mass[y_train == '0'], bins=50, alpha=0.6, label='Цифра 0', color='red')
plt.hist(train_ink_mass[y_train == '1'], bins=50, alpha=0.6, label='Цифра 1', color='blue')

plt.title('Cумма пикселей для цифр 0 и 1')
plt.xlabel('Сумма яркости')
plt.ylabel('Количество картинок')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

plt.figure(figsize=(10, 4))

for digit in np.unique(y_train):
    plt.hist(train_ink_mass[y_train == digit], bins=50, alpha=0.6, label=f'Цифра {digit}')

plt.title('Cумма пикселей для всех цифр')
plt.xlabel('Сумма яркости')
plt.ylabel('Количество картинок')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

def first_model(
    X: Union[np.ndarray, List[List[float]]], 
    threshold: float
) -> np.ndarray:
    """
    Базовая модель классификации, комбинирующая интенсивность признаков и случайное предсказание.
    
    Модель выполняет две операции:
    1. Вычисляет интенсивность для каждого образца как сумму его признаков
    2. Сравнивает интенсивность с пороговым значением
    3. Для образцов с интенсивностью ниже порога предсказывает класс '1'
    4. Для остальных образцов генерирует случайные предсказания от 0 до 9

    Аргументы:
    ----------
    X : Union[np.ndarray, List[List[float]]]
        Матрица признаков размером (n_samples, n_features).
        Может быть:
        - numpy массивом float или int
        - списком списков чисел
    threshold : float
        Пороговое значение для интенсивности признаков.
        Образцы с суммой признаков < threshold классифицируются как класс '1'.
        Для остальных генерируются случайные метки классов от 0 до 9.
    
    Возвращает:
    -----------
    np.ndarray
        Массив предсказанных меток классов в строковом формате.
        Форма: (n_samples,)
        Тип данных: строки (dtype='<U1' или '<U2' для многозначных чисел)
    """

    intensity = X.sum(axis=1)

    n_samples = X.shape[0]
    y_pred = np.random.randint(0, 10, size=n_samples)

    y_pred = np.where(intensity < threshold, 1, y_pred)

    return y_pred.astype(str)

y_pred_fast = first_model(X_test, threshold=14000)

score = accuracy_score(y_test, y_pred_fast)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 14.35%

def extract_quadrant_features(X: np.ndarray) -> np.ndarray:
    """
    Извлекает четыре квадрантных признака из изображений 28x28.
    
    Каждое изображение делится на 4 равных квадранта (14x14) и вычисляется
    сумма интенсивности пикселей в каждом квадранте.
    
    Аргументы:
    ----------
    X : np.ndarray
        Входные данные в виде плоского массива изображений.
        Форма: (n_samples, 784), где 784 = 28*28 пикселей.
        Предполагается, что пиксели нормализованы в диапазоне [0, 1] или [0, 255].
    
    Возвращает:
    -----------
    np.ndarray
        Матрицу квадрантных признаков.
        Форма: (n_samples, 4), где 4 признака соответствуют:
        0: top_left, 1: top_right, 2: bottom_left, 3: bottom_right
    """
    # 1. Возвращаем форму картинок (N образцов, 28 высота, 28 ширина)
    images = X.reshape(-1, 28, 28)

    # 2. Нарезаем на 4 части
    top_left = images[:, :14, :14]
    top_right = images[:, :14, 14:]
    bottom_left = images[:, 14:, :14]
    bottom_right = images[:, 14:, 14:]

    # 3. Считаем сумму пикселей в каждом блоке
    # axis=(1, 2) означает сумму по высоте и ширине блока
    f1 = top_left.sum(axis=(1, 2))
    f2 = top_right.sum(axis=(1, 2))
    f3 = bottom_left.sum(axis=(1, 2))
    f4 = bottom_right.sum(axis=(1, 2))

    # 4. Собираем 4 признака в одну матрицу (N, 4)
    return np.column_stack((f1, f2, f3, f4))

X_train_quadrants = extract_quadrant_features(X_train)
X_train_quadrants.shape

(49000, 4)

fig, axes = plt.subplots(2, 2, figsize=(11, 6))
axes = axes.flatten()

quadrants_names = ['Верх-Лево', 'Верх-Право', 'Низ-Лево', 'Низ-Право']

for i, ax in enumerate(axes):
    name = quadrants_names[i]

    for digit in np.unique(y_train):
        ax.hist(X_train_quadrants[y_train == digit,i], bins=50, alpha=0.4, label=f'Цифра {digit}')

        ax.set_title(f'Сектор: {name}')
        ax.set_xlabel('Сумма яркости')
        ax.set_ylabel('Количество картинок')
        ax.grid(True, alpha=0.3)
        
        if i == 0: # Рисуем легенду только на первом, чтобы не захламлять
            ax.legend(loc='upper right', fontsize='small', ncol=2)

plt.tight_layout()
plt.show()

def quadrant_model_vectorized(
    X: np.ndarray, 
    threshold_1: float = 1500, 
    threshold_2: float = 1500
) -> np.ndarray:
    """
    Векторизованная модель классификации на основе квадрантных признаков.
    
    Модель использует эвристические правила для классификации цифр MNIST:
    - Если в верхнем левом квадранте мало интенсивности (< threshold_1), предсказывает '1'
    - Если в верхнем левом много интенсивности (> threshold_1) 
      И в нижнем левом мало интенсивности (< threshold_2), предсказывает '7'
    - Для остальных случаев генерирует случайные предсказания от 0 до 9
    
    Аргументы:
    ----------
    X : np.ndarray
        Входные данные в виде плоского массива изображений.
        Форма: (n_samples, 784), где 784 = 28*28 пикселей.
        Предполагается, что пиксели в диапазоне [0, 255] для MNIST.
    threshold_1 : float, default=1500
        Порог интенсивности для верхнего левого квадранта.
        Используется для разделения цифр '1' и других.
        Значение основано на эмпирических наблюдениях для MNIST.
    threshold_2 : float, default=1500
        Порог интенсивности для нижнего левого квадранта.
        Используется в комбинации с threshold_1 для выделения цифры '7'.
    
    Возвращает:
    -----------
    np.ndarray
        Массив предсказанных меток в строковом формате.
        Форма: (n_samples,)
        Возможные значения: строковые представления цифр от '0' до '9'.
    """
    n_samples = X.shape[0]

    # Получаем матрицу (N, 4)
    features = extract_quadrant_features(X)

    # 2. Вытаскиваем нужные признаки по индексам
    # Индекс 0 = Top-Left
    q_top_left = features[:, 0]

    # Индекс 2 = Bottom-Left
    q_bot_left = features[:, 2]

    # Условие 1: Это Единица? (Мало чернил слева сверху)
    cond_1 = (q_top_left < threshold_1)

    # Условие 2: Это Семерка?
    # (Много чернил слева сверху И Мало слева снизу)
    cond_7 = (q_top_left > threshold_1) & (q_bot_left < threshold_2)

    # 4. Сборка ответа
    conditions = [cond_1, cond_7]
    choices = ['1', '7']

    # Рандом для остальных
    random_defaults = np.random.randint(0, 10, size=n_samples).astype(str)

    y_pred = np.select(conditions, choices, default=random_defaults)

    return y_pred

y_pred_quad = quadrant_model_vectorized(X_test, threshold_1=2000, threshold_2=2000)

score = accuracy_score(y_test, y_pred_quad)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 19.61%

def extract_center_feature(
    X: np.ndarray, 
    margin: int = 4
) -> np.ndarray:
    """
    Вычисляет сумму яркости в центральном квадрате изображений MNIST.
    
    Функция извлекает центральную область изображения и вычисляет
    общую интенсивность пикселей в этой области. Центральная область
    часто содержит важную информацию для распознавания цифр.
    
    Аргументы:
    ----------
    X : np.ndarray
        Входные данные в виде плоского массива изображений.
        Форма: (n_samples, 784), где 784 = 28*28 пикселей.
        Предполагается, что пиксели нормализованы в диапазоне [0, 1] или [0, 255].
    margin : int, default=4
        Отступ от центра изображения. Центральный квадрат имеет координаты:
    
    Возвращает:
    -----------
    np.ndarray
        Вектор интенсивностей центральных областей.
        Форма: (n_samples, 1)
    """
    # 1. Возвращаем форму картинок (N, 28, 28)
    images = X.reshape(-1, 28, 28)

    # 2. Вычисляем координаты среза
    center = 14
    start = center - margin
    end = center + margin

    # 3. Вырезаем центр и считаем сумму
    center_patch = images[:, start:end, start:end]
    center_mass = center_patch.sum(axis=(1, 2))

    return center_mass.reshape(-1, 1)

X_train_center = extract_center_feature(X_train)
X_train_center.shape

(49000, 1)

plt.figure(figsize=(10, 4))

for digit in np.unique(y_train):
    plt.hist(X_train_center[y_train == digit], bins=50, alpha=0.6, label=f'Цифра {digit}')

plt.title('Cумма пикселей для всех цифр')
plt.xlabel('Сумма яркости')
plt.ylabel('Количество картинок')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

def center_hole_model(X: np.ndarray, threshold: float = 2000) -> np.ndarray:
    """
    Классификатор, определяющий цифру '0' по низкой интенсивности в центре изображения.
    
    Модель основана на эвристике: цифра '0' обычно имеет низкую интенсивность пикселей
    в центральной области из-за отверстия в центре. Все остальные цифры классифицируются
    случайным образом.
    
    Аргументы:
    ----------
    X : np.ndarray
        Входные данные в виде плоского массива изображений.
        Форма: (n_samples, 784), где 784 = 28*28 пикселей.
        Предполагается, что пиксели в диапазоне [0, 255] для MNIST.
    threshold : float, default=2000
        Порог интенсивности для центральной области.
        Если сумма яркости в центральной области (8x8 пикселей) меньше порога,
        изображение классифицируется как '0'.
    
    Возвращает:
    -----------
    np.ndarray
        Массив предсказанных меток в строковом формате.
        Форма: (n_samples,)
        Значения: строковые представления цифр от '0' до '9'.
    """
    # 1. Считаем признак
    center_intensity = extract_center_feature(X, margin=4).flatten()

    n_samples = X.shape[0]

    # 2. База: Генерируем случайные предсказания для ВСЕХ
    y_pred = np.random.randint(0, 10, size=n_samples).astype(str)

    # 3. Условие (Маска)
    is_zero_likely = center_intensity < threshold

    # 4. Применяем правило: заменяем рандом на '0' там, где условие выполнено
    y_pred[is_zero_likely] = '0'

    return y_pred

y_pred_quad = center_hole_model(X_test, threshold=3000)

score = accuracy_score(y_test, y_pred_quad)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 14.44%

def combined_rules_model(
    X: np.ndarray, 
    threshold_tl: float = 2000, 
    threshold_bl: float = 2000, 
    threshold_center: float = 3000
) -> np.ndarray:
    """
    Комбинированный классификатор, объединяющий правила для цифр 0, 1 и 7.
    
    Модель последовательно применяет три эвристических правила:
    1. Если мало интенсивности в верхнем левом квадранте → предсказывает '1'
    2. Если низкая интенсивность в центральной области → предсказывает '0'
    3. Если много интенсивности в верхнем левом И мало в нижнем левом квадранте → предсказывает '7'
    4. Если ни одно правило не сработало → случайное предсказание
    
    Аргументы:
    ----------
    X : np.ndarray
        Входные данные в виде плоского массива изображений.
        Форма: (n_samples, 784), где 784 = 28*28 пикселей.
        Предполагается, что пиксели в диапазоне [0, 255] для MNIST.
    threshold_tl : float, default=2000
        Порог интенсивности для верхнего левого квадранта (14x14 пикселей).
        Используется в правилах для цифр '1' и '7'.
    threshold_bl : float, default=2000
        Порог интенсивности для нижнего левого квадранта (14x14 пикселей).
        Используется в правиле для цифры '7'.
    threshold_center : float, default=3000
        Порог интенсивности для центральной области (8x8 пикселей при margin=4).
        Используется в правиле для цифры '0'.
    
    Возвращает:
    -----------
    np.ndarray
        Массив предсказанных меток в строковом формате.
        Форма: (n_samples,)
        Значения: строковые представления цифр от '0' до '9'.
    
    Порядок применения правил:
    -------------------------
    Правила применяются последовательно в порядке: 1 → 0 → 7
    Если срабатывает правило для '1', проверка остальных правил не выполняется.
    """
    n_samples = X.shape[0]


    # А. Получаем квадранты (вызываем твою первую функцию)
    quad_features = extract_quadrant_features(X)

    q_top_left = quad_features[:, 0] # Индекс 0
    q_bot_left = quad_features[:, 2] # Индекс 2

    # Условие 1: Единица (Мало чернил слева сверху)
    cond_1 = (q_top_left < threshold_tl)

    # Условие 2: Семерка (Много слева сверху И Мало слева снизу)
    cond_7 = (q_top_left > threshold_tl) & (q_bot_left < threshold_bl)

    # Условие 3: Ноль (Пустой центр)
    center_mass = extract_center_feature(X, margin=4).flatten()
    cond_0 = (center_mass < threshold_center)

    # Порядок проверки: Сначала 1, если нет - проверяем на 0, если нет - на 7.
    conditions = [cond_1, cond_0, cond_7]
    choices = ['1', '0', '7']

    # Если ничего не подошло - рандом
    random_defaults = np.random.randint(0, 10, size=n_samples).astype(str)

    y_pred = np.select(conditions, choices, default=random_defaults)

    return y_pred

y_pred_quad = combined_rules_model(X_test, threshold_tl=2000, threshold_bl=2000, threshold_center=3000)

score = accuracy_score(y_test, y_pred_quad)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 23.60%

model = KNeighborsClassifier(n_neighbors=5, algorithm="brute")

model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='brute')

y_pred = model.predict(X_test)

y_pred.shape, y_pred[:15]

((21000,),
 array(['8', '4', '8', '7', '7', '0', '6', '2', '7', '4', '3', '9', '9',
        '8', '2'], dtype=object))

plt.figure(figsize=(13, 6))

for i in range(24):
    plt.subplot(3, 8, i + 1)
    image, predicted_label, real_label = get_random_image(X_test, y_pred, y_test)
    # выводим само изображение
    plt.imshow(image, cmap="gray")
    # выводим истинные и предсказанные метки
    plt.title(f"predicted = {predicted_label} \n real = {real_label}")
    plt.axis("off")
plt.show()

score = accuracy_score(y_test, y_pred)
print(f"метрика accuracy = {score*100:.2f}%")

метрика accuracy = 96.84%

	strategy strategy: {"most_frequent", "prior", "stratified", "uniform", "constant"}, default="prior" Strategy to use to generate predictions. * "most_frequent": the `predict` method always returns the most frequent class label in the observed `y` argument passed to `fit`. The `predict_proba` method returns the matching one-hot encoded vector. * "prior": the `predict` method always returns the most frequent class label in the observed `y` argument passed to `fit` (like "most_frequent"). ``predict_proba`` always returns the empirical class distribution of `y` also known as the empirical class prior distribution. * "stratified": the `predict_proba` method randomly samples one-hot vectors from a multinomial distribution parametrized by the empirical class prior probabilities. The `predict` method returns the class label which got probability one in the one-hot vector of `predict_proba`. Each sampled row of both methods is therefore independent and identically distributed. * "uniform": generates predictions uniformly at random from the list of unique classes observed in `y`, i.e. each class has equal probability. * "constant": always predicts a constant label that is provided by the user. This is useful for metrics that evaluate a non-majority class. .. versionchanged:: 0.24 The default value of `strategy` has changed to "prior" in version 0.24.	'most_frequent'
	random_state random_state: int, RandomState instance or None, default=None Controls the randomness to generate the predictions when ``strategy='stratified'`` or ``strategy='uniform'``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `.	None
	constant constant: int or str or array-like of shape (n_outputs,), default=None The explicit constant as predicted by the "constant" strategy. This parameter is useful only for the "constant" strategy.	None

	strategy strategy: {"most_frequent", "prior", "stratified", "uniform", "constant"}, default="prior" Strategy to use to generate predictions. * "most_frequent": the `predict` method always returns the most frequent class label in the observed `y` argument passed to `fit`. The `predict_proba` method returns the matching one-hot encoded vector. * "prior": the `predict` method always returns the most frequent class label in the observed `y` argument passed to `fit` (like "most_frequent"). ``predict_proba`` always returns the empirical class distribution of `y` also known as the empirical class prior distribution. * "stratified": the `predict_proba` method randomly samples one-hot vectors from a multinomial distribution parametrized by the empirical class prior probabilities. The `predict` method returns the class label which got probability one in the one-hot vector of `predict_proba`. Each sampled row of both methods is therefore independent and identically distributed. * "uniform": generates predictions uniformly at random from the list of unique classes observed in `y`, i.e. each class has equal probability. * "constant": always predicts a constant label that is provided by the user. This is useful for metrics that evaluate a non-majority class. .. versionchanged:: 0.24 The default value of `strategy` has changed to "prior" in version 0.24.	'uniform'
	random_state random_state: int, RandomState instance or None, default=None Controls the randomness to generate the predictions when ``strategy='stratified'`` or ``strategy='uniform'``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `.	None
	constant constant: int or str or array-like of shape (n_outputs,), default=None The explicit constant as predicted by the "constant" strategy. This parameter is useful only for the "constant" strategy.	None

	n_neighbors n_neighbors: int, default=5 Number of neighbors to use by default for :meth:`kneighbors` queries.	5
	weights weights: {'uniform', 'distance'}, callable or None, default='uniform' Weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Refer to the example entitled :ref:`sphx_glr_auto_examples_neighbors_plot_classification.py` showing the impact of the `weights` parameter on the decision boundary.	'uniform'
	algorithm algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto' Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force.	'brute'
	leaf_size leaf_size: int, default=30 Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem.	30
	p p: float, default=2 Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used. This parameter is expected to be positive.	2
	metric metric: str or callable, default='minkowski' Metric to use for distance computation. Default is "minkowski", which results in the standard Euclidean distance when p = 2. See the documentation of `scipy.spatial.distance `_ and the metrics listed in :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric values. If metric is "precomputed", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only "nonzero" elements may be considered neighbors. If metric is a callable function, it takes two arrays representing 1D vectors as inputs and must return one value indicating the distance between those vectors. This works for Scipy's metrics, but is less efficient than passing the metric name as a string.	'minkowski'
	metric_params metric_params: dict, default=None Additional keyword arguments for the metric function.	None
	n_jobs n_jobs: int, default=None The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. Doesn't affect :meth:`fit` method.	None

Введение в анализ данных ¶

Модели и их обучение на примере распознавания рукописных цифр¶

1. Датасет MNIST¶

2. Теория о данных ⚙️¶

3. Обучение и применение моделей ⚙️¶

3.1. Виды методов обучения¶

4. Бейзлайн-модели¶

4.1. Стратегия "самый частый класс"¶

4.2. Визуализация результатов¶

4.3. Метрика и качество модели¶

4.4. Стратегия "полный рандом"¶

4.5. Стратегия "стратифицированный рандом"¶

5. Собственная модель¶

5.1. Первая модель¶

5.2. Попытка улучшения¶

5.3. Работаем дальше¶

5.4. Ну, пожалуйста, еще разочек!¶

6. Метод ближайших соседей¶

6.1. Построение модели¶

6.2. Анализ результатов¶

Введение в анализ данных¶

Модели и их обучение на примере распознавания рукописных цифр¶

1. Датасет MNIST¶

2. Теория о данных ⚙️¶

3. Обучение и применение моделей ⚙️¶

3.1. Виды методов обучения¶

4. Бейзлайн-модели¶

4.1. Стратегия "самый частый класс"¶

4.2. Визуализация результатов¶

4.3. Метрика и качество модели¶

4.4. Стратегия "полный рандом"¶

4.5. Стратегия "стратифицированный рандом"¶

5. Собственная модель¶

5.1. Первая модель¶

5.2. Попытка улучшения¶

5.3. Работаем дальше¶

5.4. Ну, пожалуйста, еще разочек!¶

6. Метод ближайших соседей¶

6.1. Построение модели¶

6.2. Анализ результатов¶

Введение в анализ данных ¶