Friday, July 23, 2021

Converting to C++ and refactoring #24 from Eugene Djobs Stream as of 12/25/2020

Writing C++ procedure below we get a final answer as C++ string , then via sequence of operators which convert string to  the pointer (say c) to "const char" and finally return required value via pointer to PyObject provided  by PyUnicode_FromString(c) to Python Runtime module.

    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    return result;

Original task sounds like 

The text file contains only capital letters of the Latin alphabet (ABC ... Z). Identify the most common character in the file immediately after the letter X. In the answer, write down this symbol first, and then immediately (no separator) how many times it occurs after the letter X.

Python Code for this task 

with open('24-B251220.txt') as f:

  lines = f.readlines()

  for line in lines:

     letters = 'QWERTYUIOPASDFGHJKLZXCVBNM'

     count = [0]*26

     maxChar, maxFreq = ' ', 0

     for i in range(26):

        for j in range(1, len(line)):

           if line[j-1] == 'X' and line[j] == letters[i]:

              count[i] += 1

     for i in range(26):

       if  maxFreq < count[i]:

        maxFreq = count[i]

        maxChar = letters[i]

print(maxChar,maxFreq)

where 24-B251220.txt is just a row of length 1000000 bytes

***********************************************
If we would loop 50 times 24-B251220.txt >> 24-C251220.txt then we could wait for a while Python module above to exit
***********************************************
Create following test.cpp file for Python to C++ extension :

#include <iostream>
#include <fstream>
#include <string>
#include <cstddef>
#include <Python.h>
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wreturn-type"

extern "C"{}
namespace {

static PyObject* XFrecuencyLetter(PyObject* self, PyObject* args)
{
   char *filename = NULL;
   int idx = 0;

   if(!PyArg_ParseTuple(args, "is", &idx, &filename)) {
        return NULL;
    }

   std::string letters = "QWERTYUIOPASDFGHJKLZXCVBNM";
   char alpha[27] =  "QWERTYUIOPASDFGHJKLZXCVBNM";
   int count [26] = {};
   int maxFreq = 0;
   char maxChar;
   std::fstream newfile;
   newfile.open(filename,std::ios::in);

   if (newfile.is_open()){
   std::string tp;
    while (getline(newfile, tp))
     {
      for(int i=1; i < 26;i++)
      {
       for(int j=1; j <= tp.size(); j++)
        {
          if ( tp.at(j-1) == alpha[idx]  && tp.at(j) == letters.at(i))
              count[i] += 1;
        }
      }
      for(int i=1; i < 26;i++)
      {
        if (maxFreq < count[i])
         {
          maxFreq = count[i];
          maxChar = letters[i];
         }
      }
     }
    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    newfile.close();
    return result;
   }
};

static PyMethodDef myMethods[] = {
    { "XFrecuencyLetter",XFrecuencyLetter,METH_VARARGS, "Prints frequency letter" },
    { NULL, NULL, 0, NULL }
};

static struct PyModuleDef myModule = {
    PyModuleDef_HEAD_INIT,
    "myModule",
    "Test Module",
    -1,
    myMethods
};

PyMODINIT_FUNC PyInit_myModule(void)
{
    return PyModule_Create(&myModule);
};
} //namespace


Now create MyAlpha.py as follows to be able analyzed the question for any letter different from 'X'

(.env) [boris@fedora33server DJS24251220]$ cat MyAlpha.py
import myModule
Z = input("Input Letter you are concerned about : ")
alpha = ['Q','W','E','R','T','Y',',U','I','O','P','A','S','D','F','G','H','J','K','L','Z','X','C','V','B','N','M']
index = alpha.index(Z)
file = input("Input filename : " )
print(myModule.XFrecuencyLetter(index,file))

******************************************
Build and install required shared library
******************************************
(.env) [boris@fedora33server DJS24251220]$ python setup.py install
running install
running build
running build_ext
building 'myModule' extension
creating build
creating build/temp.linux-x86_64-3.9
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/DJS24251220/.env/include -I/usr/include/python3.9 -c test.cpp -o build/temp.linux-x86_64-3.9/test.o
creating build/lib.linux-x86_64-3.9
g++ -pthread -shared -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g build/temp.linux-x86_64-3.9/test.o -L/usr/lib64 -o build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so
running install_lib
copying build/lib.linux-x86_64-3.9/myModule.cpython-39-x86_64-linux-gnu.so -> /home/boris/DJS24251220/.env/lib64/python3.9/site-packages
running install_egg_info
Removing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info
Writing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info

(.env) [boris@fedora33server DJS24251220]$ ll *.txt
-rw-rw-r--. 1 boris boris  1000000 Jul 21 21:33 24-B251220.txt
-rw-rw-r--. 1 boris boris 50000000 Jul 21 23:09 24-C251220.txt

Now test C++ perfomance via running

(.env) [boris@fedora33server DJS24251220]$ python MyAlpha.py
Input Letter you concerned about : X
Input filename : 24-B251220.txt
U1618

(.env) [boris@fedora33server DJS24251220]$ python MyAlpha.py
Input Letter you concerned about : X
Input filename : 24-C251220.txt
U80900

Then runtime for "C" file is around 1-2 sec.










































































No comments:

Post a Comment