Thursday, July 29, 2021

Operations on Armadillo matrices via NumPy array and Python Extension to C++

The core test is converting numpy array  into matrix 5x5 in Armadillo C++ inversion matrix into matrix C and verification myMat * C = E , afterwards we can safely return matrix C to Python Runtime Module

I was able to succeed with  armadillo-10.6.1 setup on fedora 34 only via build from source , previously installed

$ sudo dnf install cmake openblas-devel lapack-devel  \

   arpack-devel SuperLU-devel

$ tar -xvf armadillo-10.6.1.tar.xz 

$ cd armadillo-10.6.1

$ ./configure

$ make

$ sudo make install

Here we follow [1] all updates were colored in blue inside code

(.env) [boris@fedora33server NUMPYCPP]$ cat examplePlus.cpp

#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION

extern "C" {

    #include <Python.h>

    #include <numpy/arrayobject.h>

}

#include <exception>

#include <cassert>

#include <string>

#include <type_traits>

#include <map>

#include <vector>

#include <armadillo>

class WrongDimensions : public std::exception

{

public:

    WrongDimensions() {}

    const char* what() const noexcept { return msg.c_str(); }

private:

    std::string msg = "The dimensions were incorrect";

};

class NotImplemented : public std::exception

{

public:

    NotImplemented() {}

    const char* what() const noexcept { return msg.c_str(); }

private:

    std::string msg = "Not implemented";

};

class BadArrayLayout : public std::exception

{

public:

    BadArrayLayout() {}

    const char* what() const noexcept { return msg.c_str(); }

private:

    std::string msg = "The matrix was not contiguous";

};

static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)

{

    npy_intp ndims = PyArray_NDIM(pyarr);

    npy_intp* dims = PyArray_SHAPE(pyarr);

    std::vector<npy_intp> result;

    for (int i = 0; i < ndims; i++) {

        result.push_back(dims[i]);

    }

    return result;

}

/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't

 * care what the size is in that dimension. Pass dimensions (X, 1) for a vector.

 */

static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)

{

    const auto dims = getPyArrayDimensions(pyarr);

    assert(dims.size() <= 2 && dims.size() > 0);

    if (dims.size() == 1) {

        return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);

    }

    else {

        return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);

    }

}

template<typename outT>

static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)

{

    if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();

    int arrTypeCode;

    if (std::is_same<outT, uint16_t>::value) {

        arrTypeCode = NPY_UINT16;

    }

    else if (std::is_same<outT, double>::value) {

        arrTypeCode = NPY_DOUBLE;

    }

    else {

        throw NotImplemented();

    }

    const auto dims = getPyArrayDimensions(pyarr);

    if (dims.size() == 1) {

        outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));

        return arma::Col<outT>(dataPtr, dims[0], true);

    }

    else {

PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);

        if (reqDescr == NULL) throw std::bad_alloc();

PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);

        if (cleanArr == NULL) throw std::bad_alloc();

        reqDescr = NULL;  

outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));

        arma::Mat<outT> result (dataPtr, dims[0], dims[1], true);  

        Py_DECREF(cleanArr);

        return result;

    }

}

static PyObject* convertArmaToPyArray(const arma::mat& matrix)

{

    npy_intp ndim = matrix.is_colvec() ? 1 : 2;

    npy_intp nRows = static_cast<npy_intp>(matrix.n_rows);  // NOTE: This narrows the integer

    npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);

    npy_intp dims[2] = {nRows, nCols};

    PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);

    if (result == NULL) throw std::bad_alloc();

    double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));

    for (int i = 0; i < nRows; i++) {

        for (int j = 0; j < nCols; j++) {

            resultDataPtr[i * nCols + j] = matrix(i, j);

        }

    }

    return result;

}

extern "C" {

    static PyObject* example_testFunction(PyObject* self, PyObject* args)

    {   

        int flag = 1;

        PyArrayObject* myArray = NULL;

        if (!PyArg_ParseTuple(args, "iO!", &flag, &PyArray_Type, &myArray)) {

            return NULL;

        }

PyObject* output = NULL;

try {

    arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);

            arma::mat A = {{ 1, 2, 3, 2, 4},

                           { 2, 3, 4, 1, 5},

                           { 3, 4, 5, 7, 1},

                           { 4, 5, 6, 1, 2},

                           { 5, 6, 7, 3, 1}};

            arma::mat myOut;

            if ( flag == 1) {

                myOut =  myMat * A ;

            }

            if ( flag == 2) {

                myOut = arma::inv(myMat);

            }

            if (flag == 0)  {

                arma::mat C= arma::inv(myMat);

                myOut =  myMat * C ;

            }

           output = convertArmaToPyArray(myOut);

        }

        catch (const std::bad_alloc&) {

            PyErr_NoMemory();

            Py_XDECREF(output);

            return NULL;

        }

        catch (const std::exception& err) {

            PyErr_SetString(PyExc_RuntimeError, err.what());

            Py_XDECREF(output);

            return NULL;

        }

        return output;

    }

    static PyMethodDef example_methods[] =

    {

        {"test_function", example_testFunction, METH_VARARGS, "A test function"},

        {NULL, NULL, 0, NULL}

    };

    static struct PyModuleDef example_module = {

       PyModuleDef_HEAD_INIT,

       "example",   /* name of module */

       NULL, /* module documentation, may be NULL */

       -1,       /* size of per-interpreter state of the module,

                    or -1 if the module keeps state in global variables. */

       example_methods

    };

    PyMODINIT_FUNC

    PyInit_example(void)

    {

        import_array();

        PyObject* m = PyModule_Create(&example_module);

        if (m == NULL) return NULL;

        return m;

    }

}

(.env) [boris@fedora33server NUMPYCPP]$ cat setup.py
from setuptools import setup, Extension
import numpy as np

example_module = Extension(
    'example',
    include_dirs=[np.get_include(), '/usr/local/include'],
    libraries=['armadillo'],
    library_dirs=['/usr/local/lib'],
    sources=['examplePlus.cpp'],
    language='c++',
    extra_compile_args=['-std=c++11']
    )

setup(name='example',
      ext_modules=[example_module],
      )

****************************
Now build and install
************************************

(.env) [boris@fedora33server NUMPYCPP]$ python setup.py install
running install
running bdist_egg
running egg_info
writing example.egg-info/PKG-INFO
writing dependency_links to example.egg-info/dependency_links.txt
writing top-level names to example.egg-info/top_level.txt
reading manifest file 'example.egg-info/SOURCES.txt'
writing manifest file 'example.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_ext
building 'example' extension
creating build
creating build/temp.linux-x86_64-3.9
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/NUMPYCPP/.env/lib64/python3.9/site-packages/numpy/core/include -I/usr/local/include -I/home/boris/NUMPYCPP/.env/include -I/usr/include/python3.9 -c examplePlus.cpp -o build/temp.linux-x86_64-3.9/examplePlus.o -std=c++11
creating build/lib.linux-x86_64-3.9
g++ -pthread -shared -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g build/temp.linux-x86_64-3.9/examplePlus.o -L/usr/local/lib -L/usr/lib64 -larmadillo -o build/lib.linux-x86_64-3.9/example.cpython-39-x86_64-linux-gnu.so
creating build/bdist.linux-x86_64
creating build/bdist.linux-x86_64/egg
copying build/lib.linux-x86_64-3.9/example.cpython-39-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/egg
creating stub loader for example.cpython-39-x86_64-linux-gnu.so
byte-compiling build/bdist.linux-x86_64/egg/example.py to example.cpython-39.pyc
creating build/bdist.linux-x86_64/egg/EGG-INFO
copying example.egg-info/PKG-INFO -> build/bdist.linux-x86_64/egg/EGG-INFO
copying example.egg-info/SOURCES.txt -> build/bdist.linux-x86_64/egg/EGG-INFO
copying example.egg-info/dependency_links.txt -> build/bdist.linux-x86_64/egg/EGG-INFO
copying example.egg-info/top_level.txt -> build/bdist.linux-x86_64/egg/EGG-INFO
writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt
zip_safe flag not set; analyzing archive contents...
__pycache__.example.cpython-39: module references __file__
creating 'dist/example-0.0.0-py3.9-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it
removing 'build/bdist.linux-x86_64/egg' (and everything under it)
Processing example-0.0.0-py3.9-linux-x86_64.egg
removing '/home/boris/NUMPYCPP/.env/lib/python3.9/site-packages/example-0.0.0-py3.9-linux-x86_64.egg' (and everything under it)
creating /home/boris/NUMPYCPP/.env/lib/python3.9/site-packages/example-0.0.0-py3.9-linux-x86_64.egg
Extracting example-0.0.0-py3.9-linux-x86_64.egg to /home/boris/NUMPYCPP/.env/lib/python3.9/site-packages
example 0.0.0 is already the active version in easy-install.pth

Installed /home/boris/NUMPYCPP/.env/lib/python3.9/site-packages/example-0.0.0-py3.9-linux-x86_64.egg
Processing dependencies for example==0.0.0
Finished processing dependencies for example==0.0.0

(.env) [boris@fedora33server NUMPYCPP]$ cat  MyProg.py
import numpy as np
import example
flg = int(input("Enter 1 to multiply matrices or 2 to inverse or 0 to verify : "))
lst = []
a = np.array([[1, 2, 3, 4, 5],
              [2, 8, 4, 1, 3],
              [5, 4, 1, 5, 2],
              [4, 3, 2, 7, 1],
              [3, 4, 6, 1, 2]], dtype='float64')
if (flg == 1):
   lst = example.test_function(flg,a)
if (flg == 2):
   lst = example.test_function(flg,a)
if (flg == 0):
   lst = example.test_function(flg,a)
print(*lst, separator = "\n")




REFERENCES


Saturday, July 24, 2021

Assembly of Python External C++ procedure returning the vector of objects of string type

 This post is an immediate followup for the most recent post at Lxer.com regarding return of one string. Consider the task already been treated with 2D vector and dumping the content of vector to disk file. This approach would allow us to solve the same task via to straight forward  returning vector of strings  from C++ procedure to Python module.

Remind the task itself 

Write a program that searches among the integers that belong to the numerical segment [174457;174505], numbers that have exactly two different natural divisors, not counting the unit and the number itself. For each of the found number, write these two divisors into the table on the screen with a new lines in ascending order of the product of these two divisors. Divisors in the table row must also follow in ascending order.

We intend to assembly  Python External C++ procedure using vector of strings to solve the the task mentioned above for segment [1744457;3044505] without significant performance penalty, which would affect pure Python module runtime.

In fact, this is an alternative way to get a solution without 2D vector which was suggested  in  http://lxer.com/module/newswire/view/302902/index.html

C++ procedure requires just 4 seconds to scan segment [1744457;3044505]


(.env) [boris@fedora33server STRINGVEC]$ cat procVector.h
#pragma once
#include <iostream>
#include <vector>
#include <cstddef>
#include <string>

namespace abc {

std::vector<std::string>  resultList(int m, int n);

}
(.env) [boris@fedora33server STRINGVEC]$ cat procVector.cpp
#include <iostream>
#include <vector>
#include <cstddef>
#include <cmath>
#include <string>
#pragma GCC diagnostic ignored "-Wsign-compare"

namespace abc {

std::vector<std::string>   resultList(int m, int n)
{
   std::vector<std::string> vec;
   std::string blank = " ";
   int j,cCount,cntDiv;

    for (int i = m; i <= n  ; i++) {
       cCount = 0 ;
       cntDiv = 0 ; 
       j = 2 ;
      while (cCount <= 1 && j <= int(sqrt(i)))
      {
          if (i%j == 0)
          {
           cntDiv = j;
           cCount +=1;
          }
          j += 1;
      }           
      if (cCount == 1 && cntDiv != i/cntDiv) 
      {        
         std::string answer = std::to_string(cntDiv) + blank;
         answer = answer + std::to_string(i/cntDiv);
         vec.push_back(answer);
       }
     } 
    return vec;
};
} //namespace

The core solution is provided by the same operations and API call as it has been done for a single string , but now performed inside the loop as follows. PyList_New() and PyList_SetItem()  work in particular case as they did for vector of integers

 std::vector<std::string>rez = abc::resultList(m1,n1);
 PyObject* result = PyList_New(rez.size());
 for(int i = 0; i < rez.size(); i++) {
 PyList_SetItem(result,i,PyUnicode_FromString(rez[i].c_str()));   }

where we again rely PyUnicode_FromString(rez[i].c_str())

(.env) [boris@fedora33server STRINGVEC]$ cat mainVector.cpp
#include <Python.h>
#include <iostream>
#include <vector>
#include <cstddef>
#include <cstdlib>
#include <string>
#include "procVector.h"
#pragma GCC diagnostic ignored "-Wsign-compare"

extern "C"{}

namespace {

static PyObject *resList(PyObject* self, PyObject* args)

{
 int m1,n1;
 if(!PyArg_ParseTuple(args,"ii",&m1,&n1))
        return NULL;

 std::vector<std::string>rez = abc::resultList(m1,n1);
 PyObject* result = PyList_New(rez.size());
 for(int i = 0; i < rez.size(); i++) {
 PyList_SetItem(result,i,PyUnicode_FromString(rez[i].c_str()));   }
  return result;
};

static PyMethodDef myMethodsTable[] = {

    { "resList", resList, METH_VARARGS, "Tracking DEMO VAR #25" },
    { NULL, NULL, 0, NULL }

};

static struct PyModuleDef myModule = {
    PyModuleDef_HEAD_INIT,
    "myModule",
    "Test Module",
    -1,
    myMethodsTable
};

PyMODINIT_FUNC PyInit_myModule(void)
{
    return PyModule_Create(&myModule);
};
} //namespace

(.env) [boris@sever33fedora STRINGVEC]$ cat setup.py
from distutils.core import setup, Extension
import sysconfig
language = 'c++'
std = 'c++20'
default_compile_args = sysconfig.get_config_var('CFLAGS').split()
extra_compile_args = [f"-std={std}", "-Wall", "-Wextra", "-Werror", "-DNDEBUG", "-O3"]
setup(name = 'myModule', version = '1.0',  \
  ext_modules = [Extension('myModule', ['mainVector.cpp','procVector.cpp'])])

Now build and install

(.env) [boris@fedora33server STRINGVEC]$ python setup.py install
running install
running build
running build_ext
building 'myModule' extension
creating build
creating build/temp.linux-x86_64-3.9
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/STRINGVEC/.env/include -I/usr/include/python3.9 -c mainVector.cpp -o build/temp.linux-x86_64-3.9/mainVector.o
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/STRINGVEC/.env/include -I/usr/include/python3.9 -c procVector.cpp -o build/temp.linux-x86_64-3.9/procVector.o
creating build/lib.linux-x86_64-3.9
g++ -pthread -shared -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g build/temp.linux-x86_64-3.9/mainVector.o build/temp.linux-x86_64-3.9/procVector.o -L/usr/lib64 -o build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so
running install_lib
copying build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so -> /home/boris/STRINGVEC/.env/lib64/python3.9/site-packages
running install_egg_info
Removing /home/boris/STRINGVEC/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info
Writing /home/boris/STRINGVEC/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info

(.env) [boris@fedora33server STRINGVEC]$ cat MyString.py
import myModule
# a = int(input("Input start   number : "))
# b = int(input("Input finish  number : "))
a = 1744457
b = 3044505
# a = 174457
# b = 174505
lst = myModule.resList(a,b)
print(*lst, sep = "\n")

(.env) [boris@fedora33server STRINGVEC]$ cat speed.sh
date
python MyString.py
date

(.env) [boris@fedora33server STRINGVEC]$ ./speed.sh | tee log
(.env) [boris@fedora33server STRINGVEC]$ head -4 log
Sat Jul 24 03:45:30 PM MSK 2021
643 2713
2 872231
233 7487
(.env) [boris@fedora33server STRINGVEC]$ tail -4 log
2 1522249
3 1014833
199 15299
Sat Jul 24 03:45:34 PM MSK 2021

























Friday, July 23, 2021

Assembly of Python External C++ procedure returning the value of string type

Writing C++ procedure below we get a final answer as C++ string , then via sequence of operations which convert string to  the pointer (say c) to "const char" and finally return required value via pointer to PyObject provided  by PyUnicode_FromString(c) to Python Runtime module.

    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    return result;

Original task sounds like ( "Eugene Djobs" Python online trainer)

The text file contains only capital letters of the Latin alphabet (ABC ... Z). Identify the most common character in the file immediately after the letter X. In the answer, write down this symbol first, and then immediately (no separator) how many times it occurs after the letter X.

Python Code for this task 

with open('24-B251220.txt') as f:

  lines = f.readlines()

  for line in lines:

     letters = 'QWERTYUIOPASDFGHJKLZXCVBNM'

     count = [0]*26

     maxChar, maxFreq = ' ', 0

     for i in range(26):

        for j in range(1, len(line)):

           if line[j-1] == 'X' and line[j] == letters[i]:

              count[i] += 1

     for i in range(26):

       if  maxFreq < count[i]:

        maxFreq = count[i]

        maxChar = letters[i]

print(maxChar,maxFreq)

where 24-B251220.txt is just a row of length 1000000 bytes

***********************************************
If we would loop 50 times 24-B251220.txt >> 24-C251220.txt then we could wait for a while (around 3.5 min) Python module above to exit
***********************************************
Create following test.cpp file for Python to C++ extension :

#include <iostream>
#include <fstream>
#include <string>
#include <cstddef>
#include <Python.h>
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wreturn-type"

extern "C"{}
namespace {

static PyObject* XFrecuencyLetter(PyObject* self, PyObject* args)
{
   char *filename = NULL;
   int idx = 0;

   if(!PyArg_ParseTuple(args, "is", &idx, &filename)) {
        return NULL;
    }

   std::string letters = "QWERTYUIOPASDFGHJKLZXCVBNM";
   char alpha[27] =  "QWERTYUIOPASDFGHJKLZXCVBNM";
   int count [26] = {};
   int maxFreq = 0;
   char maxChar;
   std::fstream newfile;
   newfile.open(filename,std::ios::in);

   if (newfile.is_open()){
   std::string tp;
    while (getline(newfile, tp))
     {
      for(int i=1; i < 26;i++)
      {
       for(int j=1; j <= tp.size(); j++)
        {
          if ( tp.at(j-1) == alpha[idx]  && tp.at(j) == letters.at(i))
              count[i] += 1;
        }
      }
      for(int i=1; i < 26;i++)
      {
        if (maxFreq < count[i])
         {
          maxFreq = count[i];
          maxChar = letters[i];
         }
      }
     }
    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    newfile.close();
    return result;
   }
};

static PyMethodDef myMethods[] = {
    { "XFrecuencyLetter",XFrecuencyLetter,METH_VARARGS, "Prints frequency letter" },
    { NULL, NULL, 0, NULL }
};

static struct PyModuleDef myModule = {
    PyModuleDef_HEAD_INIT,
    "myModule",
    "Test Module",
    -1,
    myMethods
};

PyMODINIT_FUNC PyInit_myModule(void)
{
    return PyModule_Create(&myModule);
};
} //namespace


Now create MyAlpha.py as follows to be able analyzed the question for any letter different from 'X'

(.env) [boris@fedora33server DJS24251220]$ cat MyAlpha.py
import myModule
Z = input("Input Letter you are concerned about : ")
alpha = ['Q','W','E','R','T','Y',',U','I','O','P','A','S','D','F','G','H','J','K','L','Z','X','C','V','B','N','M']
index = alpha.index(Z)
file = input("Input filename : " )
print(myModule.XFrecuencyLetter(index,file))

[boris@fedora33server DJS24251220]$ cat setup.py
from distutils.core import setup, Extension                                      
import sysconfig                                                                     
language = 'c++'                                                                     
std = 'c++20'                                                                        
default_compile_args = sysconfig.get_config_var('CFLAGS').split()                    
extra_compile_args = [f"-std={std}", "-Wall", "-Wextra", "-Werror", "-DNDEBUG", "-O3"]
setup(name = 'myModule', version = '1.0',  \
   ext_modules = [Extension('myModule', ['test.cpp'])])

******************************************
Build and install required shared library
******************************************
(.env) [boris@fedora33server DJS24251220]$ python setup.py install
running install
running build
running build_ext
building 'myModule' extension
creating build
creating build/temp.linux-x86_64-3.9
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/DJS24251220/.env/include -I/usr/include/python3.9 -c test.cpp -o build/temp.linux-x86_64-3.9/test.o
creating build/lib.linux-x86_64-3.9
g++ -pthread -shared -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g build/temp.linux-x86_64-3.9/test.o -L/usr/lib64 -o build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so
running install_lib
copying build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so -> /home/boris/DJS24251220/.env/lib64/python3.9/site-packages
running install_egg_info
Removing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info
Writing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info

(.env) [boris@fedora33server DJS24251220]$ ll *.txt
-rw-rw-r--. 1 boris boris  1000000 Jul 21 21:33 24-B251220.txt
-rw-rw-r--. 1 boris boris 50000000 Jul 21 23:09 24-C251220.txt

Now test C++ performance via running

(.env) [boris@fedora33server DJS24251220]$ python MyAlpha.py
Input Letter you are concerned about : X
Input filename : 24-B251220.txt
U1618

(.env) [boris@fedora33server DJS24251220]$ python MyAlpha.py
Input Letter you are concerned about : X
Input filename : 24-C251220.txt
U80900

Then runtime for "C" file is around 1-2 sec.




















































Converting to C++ and refactoring #24 from Eugene Djobs Stream as of 12/25/2020

Writing C++ procedure below we get a final answer as C++ string , then via sequence of operators which convert string to  the pointer (say c) to "const char" and finally return required value via pointer to PyObject provided  by PyUnicode_FromString(c) to Python Runtime module.

    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    return result;

Original task sounds like 

The text file contains only capital letters of the Latin alphabet (ABC ... Z). Identify the most common character in the file immediately after the letter X. In the answer, write down this symbol first, and then immediately (no separator) how many times it occurs after the letter X.

Python Code for this task 

with open('24-B251220.txt') as f:

  lines = f.readlines()

  for line in lines:

     letters = 'QWERTYUIOPASDFGHJKLZXCVBNM'

     count = [0]*26

     maxChar, maxFreq = ' ', 0

     for i in range(26):

        for j in range(1, len(line)):

           if line[j-1] == 'X' and line[j] == letters[i]:

              count[i] += 1

     for i in range(26):

       if  maxFreq < count[i]:

        maxFreq = count[i]

        maxChar = letters[i]

print(maxChar,maxFreq)

where 24-B251220.txt is just a row of length 1000000 bytes

***********************************************
If we would loop 50 times 24-B251220.txt >> 24-C251220.txt then we could wait for a while Python module above to exit
***********************************************
Create following test.cpp file for Python to C++ extension :

#include <iostream>
#include <fstream>
#include <string>
#include <cstddef>
#include <Python.h>
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wreturn-type"

extern "C"{}
namespace {

static PyObject* XFrecuencyLetter(PyObject* self, PyObject* args)
{
   char *filename = NULL;
   int idx = 0;

   if(!PyArg_ParseTuple(args, "is", &idx, &filename)) {
        return NULL;
    }

   std::string letters = "QWERTYUIOPASDFGHJKLZXCVBNM";
   char alpha[27] =  "QWERTYUIOPASDFGHJKLZXCVBNM";
   int count [26] = {};
   int maxFreq = 0;
   char maxChar;
   std::fstream newfile;
   newfile.open(filename,std::ios::in);

   if (newfile.is_open()){
   std::string tp;
    while (getline(newfile, tp))
     {
      for(int i=1; i < 26;i++)
      {
       for(int j=1; j <= tp.size(); j++)
        {
          if ( tp.at(j-1) == alpha[idx]  && tp.at(j) == letters.at(i))
              count[i] += 1;
        }
      }
      for(int i=1; i < 26;i++)
      {
        if (maxFreq < count[i])
         {
          maxFreq = count[i];
          maxChar = letters[i];
         }
      }
     }
    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    newfile.close();
    return result;
   }
};

static PyMethodDef myMethods[] = {
    { "XFrecuencyLetter",XFrecuencyLetter,METH_VARARGS, "Prints frequency letter" },
    { NULL, NULL, 0, NULL }
};

static struct PyModuleDef myModule = {
    PyModuleDef_HEAD_INIT,
    "myModule",
    "Test Module",
    -1,
    myMethods
};

PyMODINIT_FUNC PyInit_myModule(void)
{
    return PyModule_Create(&myModule);
};
} //namespace


Now create MyAlpha.py as follows to be able analyzed the question for any letter different from 'X'

(.env) [boris@fedora33server DJS24251220]$ cat MyAlpha.py
import myModule
Z = input("Input Letter you are concerned about : ")
alpha = ['Q','W','E','R','T','Y',',U','I','O','P','A','S','D','F','G','H','J','K','L','Z','X','C','V','B','N','M']
index = alpha.index(Z)
file = input("Input filename : " )
print(myModule.XFrecuencyLetter(index,file))

******************************************
Build and install required shared library
******************************************
(.env) [boris@fedora33server DJS24251220]$ python setup.py install
running install
running build
running build_ext
building 'myModule' extension
creating build
creating build/temp.linux-x86_64-3.9
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/DJS24251220/.env/include -I/usr/include/python3.9 -c test.cpp -o build/temp.linux-x86_64-3.9/test.o
creating build/lib.linux-x86_64-3.9
g++ -pthread -shared -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g build/temp.linux-x86_64-3.9/test.o -L/usr/lib64 -o build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so
running install_lib
copying build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so -> /home/boris/DJS24251220/.env/lib64/python3.9/site-packages
running install_egg_info
Removing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info
Writing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info

(.env) [boris@fedora33server DJS24251220]$ ll *.txt
-rw-rw-r--. 1 boris boris  1000000 Jul 21 21:33 24-B251220.txt
-rw-rw-r--. 1 boris boris 50000000 Jul 21 23:09 24-C251220.txt

Now test C++ perfomance via running

(.env) [boris@fedora33server DJS24251220]$ python MyAlpha.py
Input Letter you concerned about : X
Input filename : 24-B251220.txt
U1618

(.env) [boris@fedora33server DJS24251220]$ python MyAlpha.py
Input Letter you concerned about : X
Input filename : 24-C251220.txt
U80900

Then runtime for "C" file is around 1-2 sec.