I was doing a problem, SPOJ: FLIB. My code was running slow (getting Time Limit Exceeded error) when I was using Code1.
However, my code accepted within the required time limit when I hardcoded the multiplication instead, using Code2. Can someone tell how this was a significant change? Note that this program relies heavily on multiplication, and it is the performed repeatedly.
The question is on matrix exponentiation, and multiplication of two matrices is a very crucial aspect in it.
A[3][3] and B[3][3] are two matrices which are multiplied, and whose result is stored in C[3][3]. M is an int with which I am supposed to take the modulus. M = 10^9 + 7. s is of type long long, and I have used to it to reduce the number of times I have to apply the modulus operator.
Each element in these matrices can be of the order of \$10^9\$, after taking the modulus.
Code1:
for(i=0;i<3;i++)
for(j=0;j<3;j++)
C[i][j] = 0;
for(i=0;i<3;i++)
for(j=0;j<3;j++)
{
s=0;
for(k=0;k<3;k++)
s += ((long long)(A[i][k]))*B[k][j];
s = s%M;
C[i][j] = s;
}
Code2:
C[0][0]= (((long long)A[0][0])*B[0][0] + ((long long)A[0][1])*B[1][0] + ((long long)A[0][2])*B[2][0])%M;
C[0][1]= (((long long)A[0][0])*B[0][1] + ((long long)A[0][1])*B[1][1] + ((long long)A[0][2])*B[2][1])%M;
C[0][2]= (((long long)A[0][0])*B[0][2] + ((long long)A[0][1])*B[1][2] + ((long long)A[0][2])*B[2][2])%M;
C[1][0]= (((long long)A[1][0])*B[0][0] + ((long long)A[1][1])*B[1][0] + ((long long)A[1][2])*B[2][0])%M;
C[1][1]= (((long long)A[1][0])*B[0][1] + ((long long)A[1][1])*B[1][1] + ((long long)A[1][2])*B[2][1])%M;
C[1][2]= (((long long)A[1][0])*B[0][2] + ((long long)A[1][1])*B[1][2] + ((long long)A[1][2])*B[2][2])%M;
C[2][0]= (((long long)A[2][0])*B[0][0] + ((long long)A[2][1])*B[1][0] + ((long long)A[2][2])*B[2][0])%M;
C[2][1]= (((long long)A[2][0])*B[0][1] + ((long long)A[2][1])*B[1][1] + ((long long)A[2][2])*B[2][1])%M;
C[2][2]= (((long long)A[2][0])*B[0][2] + ((long long)A[2][1])*B[1][2] + ((long long)A[2][2])*B[2][2])%M;
This is the complete code:
#include <bits/stdc++.h>
using namespace std;
#define M 1000000007
void matmult(int A[3][3], int B[3][3], int C[3][3])
{
C[0][0]= (((long long)A[0][0])*B[0][0] + ((long long)A[0][1])*B[1][0] + ((long long)A[0][2])*B[2][0])%M;
C[0][1]= (((long long)A[0][0])*B[0][1] + ((long long)A[0][1])*B[1][1] + ((long long)A[0][2])*B[2][1])%M;
C[0][2]= (((long long)A[0][0])*B[0][2] + ((long long)A[0][1])*B[1][2] + ((long long)A[0][2])*B[2][2])%M;
C[1][0]= (((long long)A[1][0])*B[0][0] + ((long long)A[1][1])*B[1][0] + ((long long)A[1][2])*B[2][0])%M;
C[1][1]= (((long long)A[1][0])*B[0][1] + ((long long)A[1][1])*B[1][1] + ((long long)A[1][2])*B[2][1])%M;
C[1][2]= (((long long)A[1][0])*B[0][2] + ((long long)A[1][1])*B[1][2] + ((long long)A[1][2])*B[2][2])%M;
C[2][0]= (((long long)A[2][0])*B[0][0] + ((long long)A[2][1])*B[1][0] + ((long long)A[2][2])*B[2][0])%M;
C[2][1]= (((long long)A[2][0])*B[0][1] + ((long long)A[2][1])*B[1][1] + ((long long)A[2][2])*B[2][1])%M;
C[2][2]= (((long long)A[2][0])*B[0][2] + ((long long)A[2][1])*B[1][2] + ((long long)A[2][2])*B[2][2])%M;
}
void matpow(int Z[3][3], long long n, int A[3][3])
{
int temp[3][3];
int i,j;
A[0][0] = 1;
A[0][1] = 0;
A[0][2] = 0;
A[1][0] = 0;
A[1][1] = 1;
A[1][2] = 0;
A[2][0] = 0;
A[2][1] = 0;
A[2][2] = 1;
while(n>0)
{
if(n&1)
{
matmult(A,Z,temp);
for(i=0;i<3;i++)
for(j=0;j<3;j++)
A[i][j] = temp[i][j];
}
matmult(Z, Z, temp);
for(i=0;i<3;i++)
for(j=0;j<3;j++)
Z[i][j] = temp[i][j];
n/=2;
}
}
int main(int argc, const char * argv[])
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
int t;
long long n,ans = 0;
int A[3][3];
cin>>t;
while(t--)
{
cin>>n;
int Z[3][3] = {{1,2,1},{0,5,3},{0,3,2}};
if(n>1)
{
matpow(Z,n-1,A);
ans = ((long long)(A[0][0])*2 + (long long)(A[0][1])*5 + (long long)A[0][2]*3)%M;
}
else
{
if(n==0)
ans = 0;
else if(n==1)
ans = 2;
else if(n==2)
ans = 15;
}
cout<<ans<<"\n";
}
return 0;
}
A,B,C,M, andsare. Also, what are typical values for each? \$\endgroup\$