Python Forum
Saving data into xlxs colomwise - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Data Science (https://python-forum.io/forum-44.html)
+--- Thread: Saving data into xlxs colomwise (/thread-35134.html)



Saving data into xlxs colomwise - erdemath - Oct-02-2021

I generate different data-sets, and I want to save them into excel file in the form of columns. But, excel file always returns the data-sets row-wise.
Here's the code;

"""
Data Generator for Multiple Use;
"""

import numpy as np
import pandas as pd
import random
import xlsxwriter
import math
from numpy import linalg as LA
import matplotlib.pyplot as plt
import matplotlib.colors
from scipy.stats import poisson
from scipy.stats import bernoulli
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm_notebook
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs

class DataGenerator:

    """
    Pre-defined Features
    """

    def __init__(self):
        self.LengthSample = 100
        self.mu = 0.1
        self.sigma = 0.5
        self.data_set_no = 3
        self.PoissonPar = 3
        self.BenoulliPar = 0.6
        self.DataIdx = ['Gaussian', 'Bernoulli', 'Poisson']
        self.PercNo_Layer = np.asarray([200, 1000, 300, 400, 200])
        self.Active_Layer = np.asarray([10/100*self.PercNo_Layer[0], self.PercNo_Layer[1], 3/100*self.PercNo_Layer[2], 3/100*self.PercNo_Layer[3], 10/100*self.PercNo_Layer[4]])

    # ==============================================================================================================#

    """
   Data set; 
       Mixture of python blobs, Bernoulli, Gaussian, Poisson data distribution
   """

    def GenerateData(self):

        # Usual python blobs
        data_blobs, _ = make_blobs(n_samples=self.LengthSample, centers=1, n_features=int(self.Active_Layer[0]),
                                   cluster_std=5.0, random_state=1000)

        data_blobs = data_blobs.transpose()

        # Some Poisson distribution
        data_Poisson = np.asarray(
            [poisson.rvs(mu=self.PoissonPar, size=self.LengthSample) for _ in
             range(int(self.Active_Layer[0]))]).reshape(
            int(self.LengthSample), int(self.Active_Layer[0]))

        data_Poisson = data_Poisson.transpose()

        # Some Gaussian distribution
        data_Gaussian = np.asarray([np.random.normal(self.mu, self.sigma, size=self.LengthSample) for _ in
                                    range(int(self.Active_Layer[0]))]).reshape(int(self.LengthSample),
                                                                               int(self.Active_Layer[0]))

        data_Gaussian = data_Gaussian.transpose()

        # Some Bernoulli
        data_bern = np.asarray(
            [bernoulli.rvs(size=self.LengthSample, p=self.BenoulliPar) for _ in
             range(int(self.Active_Layer[0]))]).reshape(
            int(self.LengthSample), int(self.Active_Layer[0]))

        data_bern = data_bern.transpose()

        return data_blobs, data_Gaussian, data_bern, data_Poisson

    def Save2Xls(self):

        # Call the data
        data_blobs, data_Gaussian, data_bern, data_Poisson = self.GenerateData()

        # Individual data frames per data-set
        data_Poisson_df = pd.DataFrame({'Poisson': [data_Poisson]})
        data_Gaussian_df = pd.DataFrame({'Gaussian': [data_Gaussian]})
        data_blobs_df = pd.DataFrame({'Blobs': [data_blobs]})
        data_Bernoulli_df = pd.DataFrame({'Bernoulli': [data_bern]})

        data_sheets = {'Blobs':data_blobs_df, 'Poisson':data_Poisson_df, 'Gaussian':data_Gaussian_df, 'Bernoulli':data_Bernoulli_df}

        data_sets_ind = pd.ExcelWriter('./DataSets.xlsx', engine='xlsxwriter')

        for sheet_name in data_sheets.keys(): #
            data_sheets[sheet_name].to_excel(data_sets_ind, sheet_name=sheet_name)
        data_sets_ind.save()

        # Dictionary of data-sets
        data_dictionary = pd.DataFrame({'Blobs':[data_blobs], 'Poisson':[data_Poisson], 'Gaussian':[data_Gaussian], 'Bernoulli':[data_bern]})
        data_dictionary.to_excel('./data_dictionary.xlsx', sheet_name='Data dictionary', index=False)

        return data_dictionary, data_sets_ind

if __name__ == '__main__':
    runner = DataGenerator()
    runner.Save2Xls()